Exemple #1
0
def all_nonperiodic_features(times,
                             mags,
                             errs,
                             magsarefluxes=False,
                             stetson_weightbytimediff=True):
    '''
    This rolls up the functions above and returns a single dict.

    NOTE: this doesn't calculate the CDPP; that's a separate function.

    '''

    # remove nans first
    finiteind = npisfinite(times) & npisfinite(mags) & npisfinite(errs)
    ftimes, fmags, ferrs = times[finiteind], mags[finiteind], errs[finiteind]

    # remove zero errors
    nzind = npnonzero(ferrs)
    ftimes, fmags, ferrs = ftimes[nzind], fmags[nzind], ferrs[nzind]

    xfeatures = nonperiodic_lightcurve_features(times,
                                                mags,
                                                errs,
                                                magsarefluxes=magsarefluxes)
    stetj = stetson_jindex(ftimes,
                           fmags,
                           ferrs,
                           weightbytimediff=stetson_weightbytimediff)
    stetk = stetson_kindex(fmags, ferrs)

    xfeatures.update({'stetsonj': stetj, 'stetsonk': stetk})

    return xfeatures
Exemple #2
0
def all_nonperiodic_features(times,
                             mags,
                             errs,
                             magsarefluxes=False,
                             stetson_weightbytimediff=True):
    '''This rolls up the feature functions above and returns a single dict.

    NOTE: this doesn't calculate the CDPP to save time since binning and
    smoothing takes a while for dense light curves.

    Parameters
    ----------

    times,mags,errs : np.array
        The input mag/flux time-series to calculate CDPP for.

    magsarefluxes : bool
        If True, indicates `mags` is actually an array of flux values.

    stetson_weightbytimediff : bool
        If this is True, the Stetson index for any pair of mags will be
        reweighted by the difference in times between them using the scheme in
        Fruth+ 2012 and Zhange+ 2003 (as seen in Sokolovsky+ 2017)::

            w_i = exp(- (t_i+1 - t_i)/ delta_t )

    Returns
    -------

    dict
        Returns a dict with all of the variability features.

    '''

    # remove nans first
    finiteind = npisfinite(times) & npisfinite(mags) & npisfinite(errs)
    ftimes, fmags, ferrs = times[finiteind], mags[finiteind], errs[finiteind]

    # remove zero errors
    nzind = npnonzero(ferrs)
    ftimes, fmags, ferrs = ftimes[nzind], fmags[nzind], ferrs[nzind]

    xfeatures = nonperiodic_lightcurve_features(times,
                                                mags,
                                                errs,
                                                magsarefluxes=magsarefluxes)
    stetj = stetson_jindex(ftimes,
                           fmags,
                           ferrs,
                           weightbytimediff=stetson_weightbytimediff)
    stetk = stetson_kindex(fmags, ferrs)

    xfeatures.update({'stetsonj': stetj, 'stetsonk': stetk})

    return xfeatures
Exemple #3
0
def aov_periodfind(times,
                   mags,
                   errs,
                   magsarefluxes=False,
                   startp=None,
                   endp=None,
                   stepsize=1.0e-4,
                   autofreq=True,
                   normalize=True,
                   phasebinsize=0.05,
                   mindetperbin=9,
                   nbestpeaks=5,
                   periodepsilon=0.1,
                   sigclip=10.0,
                   nworkers=None,
                   verbose=True):
    '''This runs a parallelized Analysis-of-Variance (AoV) period search.

    NOTE: `normalize = True` here as recommended by Schwarzenberg-Czerny 1996,
    i.e. mags will be normalized to zero and rescaled so their variance = 1.0.

    Parameters
    ----------

    times,mags,errs : np.array
        The mag/flux time-series with associated measurement errors to run the
        period-finding on.

    magsarefluxes : bool
        If the input measurement values in `mags` and `errs` are in fluxes, set
        this to True.

    startp,endp : float or None
        The minimum and maximum periods to consider for the transit search.

    stepsize : float
        The step-size in frequency to use when constructing a frequency grid for
        the period search.

    autofreq : bool
        If this is True, the value of `stepsize` will be ignored and the
        :py:func:`astrobase.periodbase.get_frequency_grid` function will be used
        to generate a frequency grid based on `startp`, and `endp`. If these are
        None as well, `startp` will be set to 0.1 and `endp` will be set to
        `times.max() - times.min()`.

    normalize : bool
        This sets if the input time-series is normalized to 0.0 and rescaled
        such that its variance = 1.0. This is the recommended procedure by
        Schwarzenberg-Czerny 1996.

    phasebinsize : float
        The bin size in phase to use when calculating the AoV theta statistic at
        a test frequency.

    mindetperbin : int
        The minimum number of elements in a phase bin to consider it valid when
        calculating the AoV theta statistic at a test frequency.

    nbestpeaks : int
        The number of 'best' peaks to return from the periodogram results,
        starting from the global maximum of the periodogram peak values.

    periodepsilon : float
        The fractional difference between successive values of 'best' periods
        when sorting by periodogram power to consider them as separate periods
        (as opposed to part of the same periodogram peak). This is used to avoid
        broad peaks in the periodogram and make sure the 'best' periods returned
        are all actually independent.

    sigclip : float or int or sequence of two floats/ints or None
        If a single float or int, a symmetric sigma-clip will be performed using
        the number provided as the sigma-multiplier to cut out from the input
        time-series.

        If a list of two ints/floats is provided, the function will perform an
        'asymmetric' sigma-clip. The first element in this list is the sigma
        value to use for fainter flux/mag values; the second element in this
        list is the sigma value to use for brighter flux/mag values. For
        example, `sigclip=[10., 3.]`, will sigclip out greater than 10-sigma
        dimmings and greater than 3-sigma brightenings. Here the meaning of
        "dimming" and "brightening" is set by *physics* (not the magnitude
        system), which is why the `magsarefluxes` kwarg must be correctly set.

        If `sigclip` is None, no sigma-clipping will be performed, and the
        time-series (with non-finite elems removed) will be passed through to
        the output.

    nworkers : int
        The number of parallel workers to use when calculating the periodogram.

    verbose : bool
        If this is True, will indicate progress and details about the frequency
        grid used for the period search.

    Returns
    -------

    dict
        This function returns a dict, referred to as an `lspinfo` dict in other
        astrobase functions that operate on periodogram results. This is a
        standardized format across all astrobase period-finders, and is of the
        form below::

            {'bestperiod': the best period value in the periodogram,
             'bestlspval': the periodogram peak associated with the best period,
             'nbestpeaks': the input value of nbestpeaks,
             'nbestlspvals': nbestpeaks-size list of best period peak values,
             'nbestperiods': nbestpeaks-size list of best periods,
             'lspvals': the full array of periodogram powers,
             'periods': the full array of periods considered,
             'method':'aov' -> the name of the period-finder method,
             'kwargs':{ dict of all of the input kwargs for record-keeping}}

    '''

    # get rid of nans first and sigclip
    stimes, smags, serrs = sigclip_magseries(times,
                                             mags,
                                             errs,
                                             magsarefluxes=magsarefluxes,
                                             sigclip=sigclip)

    # make sure there are enough points to calculate a spectrum
    if len(stimes) > 9 and len(smags) > 9 and len(serrs) > 9:

        # get the frequencies to use
        if startp:
            endf = 1.0/startp
        else:
            # default start period is 0.1 day
            endf = 1.0/0.1

        if endp:
            startf = 1.0/endp
        else:
            # default end period is length of time series
            startf = 1.0/(stimes.max() - stimes.min())

        # if we're not using autofreq, then use the provided frequencies
        if not autofreq:
            frequencies = nparange(startf, endf, stepsize)
            if verbose:
                LOGINFO(
                    'using %s frequency points, start P = %.3f, end P = %.3f' %
                    (frequencies.size, 1.0/endf, 1.0/startf)
                )
        else:
            # this gets an automatic grid of frequencies to use
            frequencies = get_frequency_grid(stimes,
                                             minfreq=startf,
                                             maxfreq=endf)
            if verbose:
                LOGINFO(
                    'using autofreq with %s frequency points, '
                    'start P = %.3f, end P = %.3f' %
                    (frequencies.size,
                     1.0/frequencies.max(),
                     1.0/frequencies.min())
                )

        # map to parallel workers
        if (not nworkers) or (nworkers > NCPUS):
            nworkers = NCPUS
            if verbose:
                LOGINFO('using %s workers...' % nworkers)

        pool = Pool(nworkers)

        # renormalize the working mags to zero and scale them so that the
        # variance = 1 for use with our LSP functions
        if normalize:
            nmags = (smags - npmedian(smags))/npstd(smags)
        else:
            nmags = smags

        tasks = [(stimes, nmags, serrs, x, phasebinsize, mindetperbin)
                 for x in frequencies]

        lsp = pool.map(_aov_worker, tasks)

        pool.close()
        pool.join()
        del pool

        lsp = nparray(lsp)
        periods = 1.0/frequencies

        # find the nbestpeaks for the periodogram: 1. sort the lsp array by
        # highest value first 2. go down the values until we find five
        # values that are separated by at least periodepsilon in period

        # make sure to filter out non-finite values
        finitepeakind = npisfinite(lsp)
        finlsp = lsp[finitepeakind]
        finperiods = periods[finitepeakind]

        # make sure that finlsp has finite values before we work on it
        try:

            bestperiodind = npargmax(finlsp)

        except ValueError:

            LOGERROR('no finite periodogram values '
                     'for this mag series, skipping...')
            return {'bestperiod':npnan,
                    'bestlspval':npnan,
                    'nbestpeaks':nbestpeaks,
                    'nbestlspvals':None,
                    'nbestperiods':None,
                    'lspvals':None,
                    'periods':None,
                    'method':'aov',
                    'kwargs':{'startp':startp,
                              'endp':endp,
                              'stepsize':stepsize,
                              'normalize':normalize,
                              'phasebinsize':phasebinsize,
                              'mindetperbin':mindetperbin,
                              'autofreq':autofreq,
                              'periodepsilon':periodepsilon,
                              'nbestpeaks':nbestpeaks,
                              'sigclip':sigclip}}

        sortedlspind = npargsort(finlsp)[::-1]
        sortedlspperiods = finperiods[sortedlspind]
        sortedlspvals = finlsp[sortedlspind]

        # now get the nbestpeaks
        nbestperiods, nbestlspvals, peakcount = (
            [finperiods[bestperiodind]],
            [finlsp[bestperiodind]],
            1
        )
        prevperiod = sortedlspperiods[0]

        # find the best nbestpeaks in the lsp and their periods
        for period, lspval in zip(sortedlspperiods, sortedlspvals):

            if peakcount == nbestpeaks:
                break
            perioddiff = abs(period - prevperiod)
            bestperiodsdiff = [abs(period - x) for x in nbestperiods]

            # print('prevperiod = %s, thisperiod = %s, '
            #       'perioddiff = %s, peakcount = %s' %
            #       (prevperiod, period, perioddiff, peakcount))

            # this ensures that this period is different from the last
            # period and from all the other existing best periods by
            # periodepsilon to make sure we jump to an entire different peak
            # in the periodogram
            if (perioddiff > (periodepsilon*prevperiod) and
                all(x > (periodepsilon*period) for x in bestperiodsdiff)):
                nbestperiods.append(period)
                nbestlspvals.append(lspval)
                peakcount = peakcount + 1

            prevperiod = period


        return {'bestperiod':finperiods[bestperiodind],
                'bestlspval':finlsp[bestperiodind],
                'nbestpeaks':nbestpeaks,
                'nbestlspvals':nbestlspvals,
                'nbestperiods':nbestperiods,
                'lspvals':lsp,
                'periods':periods,
                'method':'aov',
                'kwargs':{'startp':startp,
                          'endp':endp,
                          'stepsize':stepsize,
                          'normalize':normalize,
                          'phasebinsize':phasebinsize,
                          'mindetperbin':mindetperbin,
                          'autofreq':autofreq,
                          'periodepsilon':periodepsilon,
                          'nbestpeaks':nbestpeaks,
                          'sigclip':sigclip}}

    else:

        LOGERROR('no good detections for these times and mags, skipping...')
        return {'bestperiod':npnan,
                'bestlspval':npnan,
                'nbestpeaks':nbestpeaks,
                'nbestlspvals':None,
                'nbestperiods':None,
                'lspvals':None,
                'periods':None,
                'method':'aov',
                'kwargs':{'startp':startp,
                          'endp':endp,
                          'stepsize':stepsize,
                          'normalize':normalize,
                          'phasebinsize':phasebinsize,
                          'mindetperbin':mindetperbin,
                          'autofreq':autofreq,
                          'periodepsilon':periodepsilon,
                          'nbestpeaks':nbestpeaks,
                          'sigclip':sigclip}}
Exemple #4
0
def nonperiodic_lightcurve_features(times, mags, errs, magsarefluxes=False):
    '''This calculates the following nonperiodic features of the light curve,
    listed in Richards, et al. 2011):

    - amplitude
    - beyond1std
    - flux_percentile_ratio_mid20
    - flux_percentile_ratio_mid35
    - flux_percentile_ratio_mid50
    - flux_percentile_ratio_mid65
    - flux_percentile_ratio_mid80
    - linear_trend
    - max_slope
    - median_absolute_deviation
    - median_buffer_range_percentage
    - pair_slope_trend
    - percent_amplitude
    - percent_difference_flux_percentile
    - skew
    - stdev
    - timelength
    - mintime
    - maxtime

    Parameters
    ----------

    times,mags,errs : np.array
        The input mag/flux time-series to process.

    magsarefluxes : bool
        If True, will treat values in `mags` as fluxes instead of magnitudes.

    Returns
    -------

    dict
        A dict containing all of the features listed above.

    '''

    # remove nans first
    finiteind = npisfinite(times) & npisfinite(mags) & npisfinite(errs)
    ftimes, fmags, ferrs = times[finiteind], mags[finiteind], errs[finiteind]

    # remove zero errors
    nzind = npnonzero(ferrs)
    ftimes, fmags, ferrs = ftimes[nzind], fmags[nzind], ferrs[nzind]

    ndet = len(fmags)

    if ndet > 9:

        # calculate the moments
        moments = lightcurve_moments(ftimes, fmags, ferrs)

        # calculate the flux measures
        fluxmeasures = lightcurve_flux_measures(ftimes,
                                                fmags,
                                                ferrs,
                                                magsarefluxes=magsarefluxes)

        # calculate the point-to-point measures
        ptpmeasures = lightcurve_ptp_measures(ftimes, fmags, ferrs)

        # get the length in time
        mintime, maxtime = npmin(ftimes), npmax(ftimes)
        timelength = maxtime - mintime

        # get the amplitude
        series_amplitude = 0.5 * (npmax(fmags) - npmin(fmags))

        # calculate the linear fit to the entire mag series
        fitcoeffs = nppolyfit(ftimes, fmags, 1, w=1.0 / (ferrs * ferrs))
        series_linear_slope = fitcoeffs[1]

        # roll fmags by 1
        rolled_fmags = nproll(fmags, 1)

        # calculate the magnitude ratio (from the WISE paper)
        series_magratio = ((npmax(fmags) - moments['median']) /
                           (npmax(fmags) - npmin(fmags)))

        # this is the dictionary returned containing all the measures
        measures = {
            'ndet': fmags.size,
            'mintime': mintime,
            'maxtime': maxtime,
            'timelength': timelength,
            'amplitude': series_amplitude,
            'ndetobslength_ratio': ndet / timelength,
            'linear_fit_slope': series_linear_slope,
            'magnitude_ratio': series_magratio,
        }
        if moments:
            measures.update(moments)
        if ptpmeasures:
            measures.update(ptpmeasures)
        if fluxmeasures:
            measures.update(fluxmeasures)

        return measures

    else:

        LOGERROR('not enough detections in this magseries '
                 'to calculate non-periodic features')
        return None
Exemple #5
0
def bls_serial_pfind(
        times,
        mags,
        errs,
        magsarefluxes=False,
        startp=0.1,  # search from 0.1 d to...
        endp=100.0,  # ... 100.0 d -- don't search full timebase
        stepsize=5.0e-4,
        mintransitduration=0.01,  # minimum transit length in phase
        maxtransitduration=0.8,  # maximum transit length in phase
        nphasebins=200,
        autofreq=True,  # figure out f0, nf, and df automatically
        periodepsilon=0.1,
        nbestpeaks=5,
        sigclip=10.0,
        verbose=True):
    '''Runs the Box Least Squares Fitting Search for transit-shaped signals.

    Based on eebls.f from Kovacs et al. 2002 and python-bls from Foreman-Mackey
    et al. 2015. This is the serial version (which is good enough in most cases
    because BLS in Fortran is fairly fast). If nfreq > 5e5, this will take a
    while.

    '''

    # get rid of nans first and sigclip
    stimes, smags, serrs = sigclip_magseries(times,
                                             mags,
                                             errs,
                                             magsarefluxes=magsarefluxes,
                                             sigclip=sigclip)

    # make sure there are enough points to calculate a spectrum
    if len(stimes) > 9 and len(smags) > 9 and len(serrs) > 9:

        # if we're setting up everything automatically
        if autofreq:

            # figure out the best number of phasebins to use
            nphasebins = int(np.ceil(2.0 / mintransitduration))

            # use heuristic to figure out best timestep
            stepsize = 0.25 * mintransitduration / (stimes.max() -
                                                    stimes.min())

            # now figure out the frequencies to use
            minfreq = 1.0 / endp
            maxfreq = 1.0 / startp
            nfreq = int(np.ceil((maxfreq - minfreq) / stepsize))

            # say what we're using
            if verbose:
                LOGINFO('min P: %s, max P: %s, nfreq: %s, '
                        'minfreq: %s, maxfreq: %s' %
                        (startp, endp, nfreq, minfreq, maxfreq))
                LOGINFO('autofreq = True: using AUTOMATIC values for '
                        'freq stepsize: %s, nphasebins: %s, '
                        'min transit duration: %s, max transit duration: %s' %
                        (stepsize, nphasebins, mintransitduration,
                         maxtransitduration))

        else:

            minfreq = 1.0 / endp
            maxfreq = 1.0 / startp
            nfreq = int(np.ceil((maxfreq - minfreq) / stepsize))

            # say what we're using
            if verbose:
                LOGINFO('min P: %s, max P: %s, nfreq: %s, '
                        'minfreq: %s, maxfreq: %s' %
                        (startp, endp, nfreq, minfreq, maxfreq))
                LOGINFO('autofreq = False: using PROVIDED values for '
                        'freq stepsize: %s, nphasebins: %s, '
                        'min transit duration: %s, max transit duration: %s' %
                        (stepsize, nphasebins, mintransitduration,
                         maxtransitduration))

        if nfreq > 5.0e5:

            if verbose:
                LOGWARNING('more than 5.0e5 frequencies to go through; '
                           'this will take a while. '
                           'you might want to use the '
                           'periodbase.bls_parallel_pfind function instead')

        if minfreq < (1.0 / (stimes.max() - stimes.min())):

            if verbose:
                LOGWARNING('the requested max P = %.3f is larger than '
                           'the time base of the observations = %.3f, '
                           ' will make minfreq = 2 x 1/timebase' %
                           (endp, stimes.max() - stimes.min()))
            minfreq = 2.0 / (stimes.max() - stimes.min())
            if verbose:
                LOGINFO('new minfreq: %s, maxfreq: %s' % (minfreq, maxfreq))

        # run BLS
        try:

            blsresult = _bls_runner(stimes, smags, nfreq, minfreq, stepsize,
                                    nphasebins, mintransitduration,
                                    maxtransitduration)

            # find the peaks in the BLS. this uses wavelet transforms to
            # smooth the spectrum and find peaks. a similar thing would be
            # to do a convolution with a gaussian kernel or a tophat
            # function, calculate d/dx(result), then get indices where this
            # is zero
            # blspeakinds = find_peaks_cwt(blsresults['power'],
            #                              nparray([2.0,3.0,4.0,5.0]))

            frequencies = minfreq + nparange(nfreq) * stepsize
            periods = 1.0 / frequencies
            lsp = blsresult['power']

            # find the nbestpeaks for the periodogram: 1. sort the lsp array
            # by highest value first 2. go down the values until we find
            # five values that are separated by at least periodepsilon in
            # period
            # make sure to get only the finite peaks in the periodogram
            # this is needed because BLS may produce infs for some peaks
            finitepeakind = npisfinite(lsp)
            finlsp = lsp[finitepeakind]
            finperiods = periods[finitepeakind]

            # make sure that finlsp has finite values before we work on it
            try:

                bestperiodind = npargmax(finlsp)

            except ValueError:

                LOGERROR('no finite periodogram values '
                         'for this mag series, skipping...')
                return {
                    'bestperiod': npnan,
                    'bestlspval': npnan,
                    'nbestpeaks': nbestpeaks,
                    'nbestlspvals': None,
                    'nbestperiods': None,
                    'lspvals': None,
                    'periods': None,
                    'method': 'bls',
                    'kwargs': {
                        'startp': startp,
                        'endp': endp,
                        'stepsize': stepsize,
                        'mintransitduration': mintransitduration,
                        'maxtransitduration': maxtransitduration,
                        'nphasebins': nphasebins,
                        'autofreq': autofreq,
                        'periodepsilon': periodepsilon,
                        'nbestpeaks': nbestpeaks,
                        'sigclip': sigclip
                    }
                }

            sortedlspind = np.argsort(finlsp)[::-1]
            sortedlspperiods = finperiods[sortedlspind]
            sortedlspvals = finlsp[sortedlspind]

            prevbestlspval = sortedlspvals[0]
            # now get the nbestpeaks
            nbestperiods, nbestlspvals, peakcount = ([
                finperiods[bestperiodind]
            ], [finlsp[bestperiodind]], 1)
            prevperiod = sortedlspperiods[0]

            # find the best nbestpeaks in the lsp and their periods
            for period, lspval in zip(sortedlspperiods, sortedlspvals):

                if peakcount == nbestpeaks:
                    break
                perioddiff = abs(period - prevperiod)
                bestperiodsdiff = [abs(period - x) for x in nbestperiods]

                # print('prevperiod = %s, thisperiod = %s, '
                #       'perioddiff = %s, peakcount = %s' %
                #       (prevperiod, period, perioddiff, peakcount))

                # this ensures that this period is different from the last
                # period and from all the other existing best periods by
                # periodepsilon to make sure we jump to an entire different
                # peak in the periodogram
                if (perioddiff > (periodepsilon * prevperiod)
                        and all(x > (periodepsilon * prevperiod)
                                for x in bestperiodsdiff)):
                    nbestperiods.append(period)
                    nbestlspvals.append(lspval)
                    peakcount = peakcount + 1

                prevperiod = period

            # generate the return dict
            resultdict = {
                'bestperiod': finperiods[bestperiodind],
                'bestlspval': finlsp[bestperiodind],
                'nbestpeaks': nbestpeaks,
                'nbestlspvals': nbestlspvals,
                'nbestperiods': nbestperiods,
                'lspvals': lsp,
                'frequencies': frequencies,
                'periods': periods,
                'blsresult': blsresult,
                'stepsize': stepsize,
                'nfreq': nfreq,
                'nphasebins': nphasebins,
                'mintransitduration': mintransitduration,
                'maxtransitduration': maxtransitduration,
                'method': 'bls',
                'kwargs': {
                    'startp': startp,
                    'endp': endp,
                    'stepsize': stepsize,
                    'mintransitduration': mintransitduration,
                    'maxtransitduration': maxtransitduration,
                    'nphasebins': nphasebins,
                    'autofreq': autofreq,
                    'periodepsilon': periodepsilon,
                    'nbestpeaks': nbestpeaks,
                    'sigclip': sigclip
                }
            }

            return resultdict

        except Exception as e:

            LOGEXCEPTION('BLS failed!')
            return {
                'bestperiod': npnan,
                'bestlspval': npnan,
                'nbestpeaks': nbestpeaks,
                'nbestlspvals': None,
                'nbestperiods': None,
                'lspvals': None,
                'periods': None,
                'stepsize': stepsize,
                'nfreq': nfreq,
                'nphasebins': nphasebins,
                'mintransitduration': mintransitduration,
                'maxtransitduration': maxtransitduration,
                'method': 'bls',
                'kwargs': {
                    'startp': startp,
                    'endp': endp,
                    'stepsize': stepsize,
                    'mintransitduration': mintransitduration,
                    'maxtransitduration': maxtransitduration,
                    'nphasebins': nphasebins,
                    'autofreq': autofreq,
                    'periodepsilon': periodepsilon,
                    'nbestpeaks': nbestpeaks,
                    'sigclip': sigclip
                }
            }

    else:

        LOGERROR('no good detections for these times and mags, skipping...')
        return {
            'bestperiod': npnan,
            'bestlspval': npnan,
            'nbestpeaks': nbestpeaks,
            'nbestlspvals': None,
            'nbestperiods': None,
            'lspvals': None,
            'periods': None,
            'stepsize': stepsize,
            'nfreq': None,
            'nphasebins': None,
            'mintransitduration': mintransitduration,
            'maxtransitduration': maxtransitduration,
            'method': 'bls',
            'kwargs': {
                'startp': startp,
                'endp': endp,
                'stepsize': stepsize,
                'mintransitduration': mintransitduration,
                'maxtransitduration': maxtransitduration,
                'nphasebins': nphasebins,
                'autofreq': autofreq,
                'periodepsilon': periodepsilon,
                'nbestpeaks': nbestpeaks,
                'sigclip': sigclip
            }
        }
Exemple #6
0
def bls_parallel_pfind(
        times,
        mags,
        errs,
        magsarefluxes=False,
        startp=0.1,  # by default, search from 0.1 d to...
        endp=100.0,  # ... 100.0 d -- don't search full timebase
        stepsize=1.0e-4,
        mintransitduration=0.01,  # minimum transit length in phase
        maxtransitduration=0.8,  # maximum transit length in phase
        nphasebins=200,
        autofreq=True,  # figure out f0, nf, and df automatically
        nbestpeaks=5,
        periodepsilon=0.1,  # 0.1
        nworkers=None,
        sigclip=10.0,
        verbose=True):
    '''Runs the Box Least Squares Fitting Search for transit-shaped signals.

    Based on eebls.f from Kovacs et al. 2002 and python-bls from Foreman-Mackey
    et al. 2015. Breaks up the full frequency space into chunks and passes them
    to parallel BLS workers.

    NOTE: the combined BLS spectrum produced by this function is not identical
    to that produced by running BLS in one shot for the entire frequency
    space. There are differences on the order of 1.0e-3 or so in the respective
    peak values, but peaks appear at the same frequencies for both methods. This
    is likely due to different aliasing caused by smaller chunks of the
    frequency space used by the parallel workers in this function. When in
    doubt, confirm results for this parallel implementation by comparing to
    those from the serial implementation above.

    '''

    # get rid of nans first and sigclip
    stimes, smags, serrs = sigclip_magseries(times,
                                             mags,
                                             errs,
                                             magsarefluxes=magsarefluxes,
                                             sigclip=sigclip)

    # make sure there are enough points to calculate a spectrum
    if len(stimes) > 9 and len(smags) > 9 and len(serrs) > 9:

        # if we're setting up everything automatically
        if autofreq:

            # figure out the best number of phasebins to use
            nphasebins = int(np.ceil(2.0 / mintransitduration))

            # use heuristic to figure out best timestep
            stepsize = 0.25 * mintransitduration / (stimes.max() -
                                                    stimes.min())

            # now figure out the frequencies to use
            minfreq = 1.0 / endp
            maxfreq = 1.0 / startp
            nfreq = int(np.ceil((maxfreq - minfreq) / stepsize))

            # say what we're using
            if verbose:
                LOGINFO('min P: %s, max P: %s, nfreq: %s, '
                        'minfreq: %s, maxfreq: %s' %
                        (startp, endp, nfreq, minfreq, maxfreq))
                LOGINFO('autofreq = True: using AUTOMATIC values for '
                        'freq stepsize: %s, nphasebins: %s, '
                        'min transit duration: %s, max transit duration: %s' %
                        (stepsize, nphasebins, mintransitduration,
                         maxtransitduration))

        else:

            minfreq = 1.0 / endp
            maxfreq = 1.0 / startp
            nfreq = int(np.ceil((maxfreq - minfreq) / stepsize))

            # say what we're using
            if verbose:
                LOGINFO('min P: %s, max P: %s, nfreq: %s, '
                        'minfreq: %s, maxfreq: %s' %
                        (startp, endp, nfreq, minfreq, maxfreq))
                LOGINFO('autofreq = False: using PROVIDED values for '
                        'freq stepsize: %s, nphasebins: %s, '
                        'min transit duration: %s, max transit duration: %s' %
                        (stepsize, nphasebins, mintransitduration,
                         maxtransitduration))

        # check the minimum frequency
        if minfreq < (1.0 / (stimes.max() - stimes.min())):

            minfreq = 2.0 / (stimes.max() - stimes.min())
            if verbose:
                LOGWARNING('the requested max P = %.3f is larger than '
                           'the time base of the observations = %.3f, '
                           ' will make minfreq = 2 x 1/timebase' %
                           (endp, stimes.max() - stimes.min()))
                LOGINFO('new minfreq: %s, maxfreq: %s' % (minfreq, maxfreq))

        #############################
        ## NOW RUN BLS IN PARALLEL ##
        #############################

        # fix number of CPUs if needed
        if not nworkers or nworkers > NCPUS:
            nworkers = NCPUS
            if verbose:
                LOGINFO('using %s workers...' % nworkers)

        # break up the tasks into chunks
        frequencies = minfreq + nparange(nfreq) * stepsize

        csrem = int(fmod(nfreq, nworkers))
        csint = int(float(nfreq / nworkers))

        chunk_minfreqs, chunk_nfreqs = [], []

        for x in range(nworkers):

            this_minfreqs = frequencies[x * csint]

            # handle usual nfreqs
            if x < (nworkers - 1):
                this_nfreqs = frequencies[x * csint:x * csint + csint].size
            else:
                this_nfreqs = frequencies[x * csint:x * csint + csint +
                                          csrem].size

            chunk_minfreqs.append(this_minfreqs)
            chunk_nfreqs.append(this_nfreqs)

        # chunk_minfreqs = [frequencies[x*chunksize] for x in range(nworkers)]
        # chunk_nfreqs = [frequencies[x*chunksize:x*chunksize+chunksize].size
        #                 for x in range(nworkers)]

        # populate the tasks list
        tasks = [(stimes, smags, chunk_minf, chunk_nf, stepsize, nphasebins,
                  mintransitduration, maxtransitduration)
                 for (chunk_nf,
                      chunk_minf) in zip(chunk_minfreqs, chunk_nfreqs)]

        if verbose:
            for ind, task in enumerate(tasks):
                LOGINFO('worker %s: minfreq = %.6f, nfreqs = %s' %
                        (ind + 1, task[3], task[2]))
            LOGINFO('running...')

        # return tasks

        # start the pool
        pool = Pool(nworkers)
        results = pool.map(parallel_bls_worker, tasks)

        pool.close()
        pool.join()
        del pool

        # now concatenate the output lsp arrays
        lsp = np.concatenate([x['power'] for x in results])
        periods = 1.0 / frequencies

        # find the nbestpeaks for the periodogram: 1. sort the lsp array
        # by highest value first 2. go down the values until we find
        # five values that are separated by at least periodepsilon in
        # period

        # make sure to get only the finite peaks in the periodogram
        # this is needed because BLS may produce infs for some peaks
        finitepeakind = npisfinite(lsp)
        finlsp = lsp[finitepeakind]
        finperiods = periods[finitepeakind]

        # make sure that finlsp has finite values before we work on it
        try:

            bestperiodind = npargmax(finlsp)

        except ValueError:

            LOGERROR('no finite periodogram values '
                     'for this mag series, skipping...')
            return {
                'bestperiod': npnan,
                'bestlspval': npnan,
                'nbestpeaks': nbestpeaks,
                'nbestlspvals': None,
                'nbestperiods': None,
                'lspvals': None,
                'periods': None,
                'method': 'bls',
                'kwargs': {
                    'startp': startp,
                    'endp': endp,
                    'stepsize': stepsize,
                    'mintransitduration': mintransitduration,
                    'maxtransitduration': maxtransitduration,
                    'nphasebins': nphasebins,
                    'autofreq': autofreq,
                    'periodepsilon': periodepsilon,
                    'nbestpeaks': nbestpeaks,
                    'sigclip': sigclip
                }
            }

        sortedlspind = np.argsort(finlsp)[::-1]
        sortedlspperiods = finperiods[sortedlspind]
        sortedlspvals = finlsp[sortedlspind]

        prevbestlspval = sortedlspvals[0]

        # now get the nbestpeaks
        nbestperiods, nbestlspvals, peakcount = ([finperiods[bestperiodind]],
                                                 [finlsp[bestperiodind]], 1)
        prevperiod = sortedlspperiods[0]

        # find the best nbestpeaks in the lsp and their periods
        for period, lspval in zip(sortedlspperiods, sortedlspvals):

            if peakcount == nbestpeaks:
                break
            perioddiff = abs(period - prevperiod)
            bestperiodsdiff = [abs(period - x) for x in nbestperiods]

            # print('prevperiod = %s, thisperiod = %s, '
            #       'perioddiff = %s, peakcount = %s' %
            #       (prevperiod, period, perioddiff, peakcount))

            # this ensures that this period is different from the last
            # period and from all the other existing best periods by
            # periodepsilon to make sure we jump to an entire different
            # peak in the periodogram
            if (perioddiff > (periodepsilon * prevperiod)
                    and all(x > (periodepsilon * prevperiod)
                            for x in bestperiodsdiff)):
                nbestperiods.append(period)
                nbestlspvals.append(lspval)
                peakcount = peakcount + 1

            prevperiod = period

        # generate the return dict
        resultdict = {
            'bestperiod': finperiods[bestperiodind],
            'bestlspval': finlsp[bestperiodind],
            'nbestpeaks': nbestpeaks,
            'nbestlspvals': nbestlspvals,
            'nbestperiods': nbestperiods,
            'lspvals': lsp,
            'frequencies': frequencies,
            'periods': periods,
            'blsresult': results,
            'stepsize': stepsize,
            'nfreq': nfreq,
            'nphasebins': nphasebins,
            'mintransitduration': mintransitduration,
            'maxtransitduration': maxtransitduration,
            'method': 'bls',
            'kwargs': {
                'startp': startp,
                'endp': endp,
                'stepsize': stepsize,
                'mintransitduration': mintransitduration,
                'maxtransitduration': maxtransitduration,
                'nphasebins': nphasebins,
                'autofreq': autofreq,
                'periodepsilon': periodepsilon,
                'nbestpeaks': nbestpeaks,
                'sigclip': sigclip
            }
        }

        return resultdict

    else:

        LOGERROR('no good detections for these times and mags, skipping...')
        return {
            'bestperiod': npnan,
            'bestlspval': npnan,
            'nbestpeaks': nbestpeaks,
            'nbestlspvals': None,
            'nbestperiods': None,
            'lspvals': None,
            'periods': None,
            'blsresult': None,
            'stepsize': stepsize,
            'nfreq': None,
            'nphasebins': None,
            'mintransitduration': mintransitduration,
            'maxtransitduration': maxtransitduration,
            'method': 'bls',
            'kwargs': {
                'startp': startp,
                'endp': endp,
                'stepsize': stepsize,
                'mintransitduration': mintransitduration,
                'maxtransitduration': maxtransitduration,
                'nphasebins': nphasebins,
                'autofreq': autofreq,
                'periodepsilon': periodepsilon,
                'nbestpeaks': nbestpeaks,
                'sigclip': sigclip
            }
        }
Exemple #7
0
def simple_flare_find(times, mags, errs,
                      smoothbinsize=97,
                      flareminsigma=4.0,
                      flaremaxcadencediff=1,
                      flaremincadencepoints=3,
                      magsarefluxes=False,
                      savgolpolyorder=2,
                      **savgolkwargs):
    '''This finds flares in  time series using the method in Walkowicz+ 2011.

    Returns number of flares found, and their time indices.

    Args
    ----

    times, mags, errs are numpy arrays for the time series.

    Kwargs
    ------

    smoothbinsize: the number of consecutive light curve points to smooth over
    in the time series using a Savitsky-Golay filter. The smoothed light curve
    is then subtracted from the actual light curve to remove trends that
    potentially last smoothbinsize light curve points. The default value is
    chosen as ~6.5 hours (97 x 4 minute cadence for HATNet/HATSouth).

    flareminsigma: the minimum sigma above the median light curve level to
    designate points as belonging to possible flares

    flaremaxcadencediff: the maximum number of light curve points apart each
    possible flare event measurement is allowed to be. If this is 1, then we'll
    look for consecutive measurements.

    flaremincadencepoints: the minimum number of light curve points (each
    flaremaxcadencediff points apart) required that are at least flareminsigma
    above the median light curve level to call an event a flare.

    magsarefluxes: if True, indicates that mags is actually an array of fluxes.

    savgolpolyorder: the polynomial order of the function used by the
    Savitsky-Golay filter.

    Any remaining keyword arguments are passed directly to the savgol_filter
    function from scipy.

    '''

    # if no errs are given, assume 0.1% errors
    if errs is None:
        errs = 0.001*mags

    # get rid of nans first
    finiteind = npisfinite(times) & npisfinite(mags) & npisfinite(errs)
    ftimes = times[finiteind]
    fmags = mags[finiteind]
    ferrs = errs[finiteind]

    # now get the smoothed mag series using the filter
    # kwargs are provided to the savgol_filter function
    smoothed = savgol_filter(fmags,
                             smoothbinsize,
                             savgolpolyorder,
                             **savgolkwargs)
    subtracted = fmags - smoothed

    # calculate some stats
    # the series_median is ~zero after subtraction
    series_mad = npmedian(npabs(subtracted))
    series_stdev = 1.483*series_mad

    # find extreme positive deviations
    if magsarefluxes:
        extind = npwhere(subtracted > (minflaresigma*series_stdev))
    else:
        extind = npwhere(subtracted < (-minflaresigma*series_stdev))

    # see if there are any extrema
    if extind and extind[0]:

        extrema_indices = extind[0]
        flaregroups = []

        # find the deviations within the requested flaremaxcadencediff
        for ind, extrema_index in enumerate(extrema_indices):

            stuff_to_do()
Exemple #8
0
def bls_serial_pfind(
        times,
        mags,
        errs,
        magsarefluxes=False,
        startp=0.1,  # search from 0.1 d to...
        endp=100.0,  # ... 100.0 d -- don't search full timebase
        stepsize=5.0e-4,
        mintransitduration=0.01,  # minimum transit length in phase
        maxtransitduration=0.4,  # maximum transit length in phase
        ndurations=100,
        autofreq=True,  # figure out f0, nf, and df automatically
        blsobjective='likelihood',
        blsmethod='fast',
        blsoversample=10,
        blsmintransits=3,
        blsfreqfactor=10.0,
        periodepsilon=0.1,
        nbestpeaks=5,
        sigclip=10.0,
        endp_timebase_check=True,
        verbose=True,
        raiseonfail=False):
    '''Runs the Box Least Squares Fitting Search for transit-shaped signals.

    Based on the version of BLS in Astropy 3.1:
    `astropy.stats.BoxLeastSquares`. If you don't have Astropy 3.1, this module
    will fail to import. Note that by default, this implementation of
    `bls_serial_pfind` doesn't use the `.autoperiod()` function from
    `BoxLeastSquares` but uses the same auto frequency-grid generation as the
    functions in `periodbase.kbls`. If you want to use Astropy's implementation,
    set the value of `autofreq` kwarg to 'astropy'.

    The dict returned from this function contains a `blsmodel` key, which is the
    generated model from Astropy's BLS. Use the `.compute_stats()` method to
    calculate the required stats like SNR, depth, duration, etc.

    Parameters
    ----------

    times,mags,errs : np.array
        The magnitude/flux time-series to search for transits.

    magsarefluxes : bool
        If the input measurement values in `mags` and `errs` are in fluxes, set
        this to True.

    startp,endp : float
        The minimum and maximum periods to consider for the transit search.

    stepsize : float
        The step-size in frequency to use when constructing a frequency grid for
        the period search.

    mintransitduration,maxtransitduration : float
        The minimum and maximum transitdurations (in units of phase) to consider
        for the transit search.

    ndurations : int
        The number of transit durations to use in the period-search.

    autofreq : bool or str
        If this is True, the values of `stepsize` and `nphasebins` will be
        ignored, and these, along with a frequency-grid, will be determined
        based on the following relations::

            nphasebins = int(ceil(2.0/mintransitduration))
            if nphasebins > 3000:
                nphasebins = 3000

            stepsize = 0.25*mintransitduration/(times.max()-times.min())

            minfreq = 1.0/endp
            maxfreq = 1.0/startp
            nfreq = int(ceil((maxfreq - minfreq)/stepsize))

        If this is False, you must set `startp`, `endp`, and `stepsize` as
        appropriate.

        If this is str == 'astropy', will use the
        `astropy.stats.BoxLeastSquares.autoperiod()` function to calculate the
        frequency grid instead of the kbls method.

    blsobjective : {'likelihood','snr'}
        Sets the type of objective to optimize in the `BoxLeastSquares.power()`
        function.

    blsmethod : {'fast','slow'}
        Sets the type of method to use in the `BoxLeastSquares.power()`
        function.

    blsoversample : {'likelihood','snr'}
        Sets the `oversample` kwarg for the `BoxLeastSquares.power()` function.

    blsmintransits : int
        Sets the `min_n_transits` kwarg for the `BoxLeastSquares.autoperiod()`
        function.

    blsfreqfactor : float
        Sets the `frequency_factor` kwarg for the `BoxLeastSquares.autperiod()`
        function.

    periodepsilon : float
        The fractional difference between successive values of 'best' periods
        when sorting by periodogram power to consider them as separate periods
        (as opposed to part of the same periodogram peak). This is used to avoid
        broad peaks in the periodogram and make sure the 'best' periods returned
        are all actually independent.

    nbestpeaks : int
        The number of 'best' peaks to return from the periodogram results,
        starting from the global maximum of the periodogram peak values.

    sigclip : float or int or sequence of two floats/ints or None
        If a single float or int, a symmetric sigma-clip will be performed using
        the number provided as the sigma-multiplier to cut out from the input
        time-series.

        If a list of two ints/floats is provided, the function will perform an
        'asymmetric' sigma-clip. The first element in this list is the sigma
        value to use for fainter flux/mag values; the second element in this
        list is the sigma value to use for brighter flux/mag values. For
        example, `sigclip=[10., 3.]`, will sigclip out greater than 10-sigma
        dimmings and greater than 3-sigma brightenings. Here the meaning of
        "dimming" and "brightening" is set by *physics* (not the magnitude
        system), which is why the `magsarefluxes` kwarg must be correctly set.

        If `sigclip` is None, no sigma-clipping will be performed, and the
        time-series (with non-finite elems removed) will be passed through to
        the output.

    endp_timebase_check : bool
        If True, will check if the ``endp`` value is larger than the time-base
        of the observations. If it is, will change the ``endp`` value such that
        it is half of the time-base. If False, will allow an ``endp`` larger
        than the time-base of the observations.

    verbose : bool
        If this is True, will indicate progress and details about the frequency
        grid used for the period search.

    raiseonfail : bool
        If True, raises an exception if something goes wrong. Otherwise, returns
        None.

    Returns
    -------

    dict
        This function returns a dict, referred to as an `lspinfo` dict in other
        astrobase functions that operate on periodogram results. This is a
        standardized format across all astrobase period-finders, and is of the
        form below::

            {'bestperiod': the best period value in the periodogram,
             'bestlspval': the periodogram peak associated with the best period,
             'nbestpeaks': the input value of nbestpeaks,
             'nbestlspvals': nbestpeaks-size list of best period peak values,
             'nbestperiods': nbestpeaks-size list of best periods,
             'lspvals': the full array of periodogram powers,
             'frequencies': the full array of frequencies considered,
             'periods': the full array of periods considered,
             'durations': the array of durations used to run BLS,
             'blsresult': Astropy BLS result object (BoxLeastSquaresResult),
             'blsmodel': Astropy BLS BoxLeastSquares object used for work,
             'stepsize': the actual stepsize used,
             'nfreq': the actual nfreq used,
             'durations': the durations array used,
             'mintransitduration': the input mintransitduration,
             'maxtransitduration': the input maxtransitdurations,
             'method':'bls' -> the name of the period-finder method,
             'kwargs':{ dict of all of the input kwargs for record-keeping}}

    '''

    # get rid of nans first and sigclip
    stimes, smags, serrs = sigclip_magseries(times,
                                             mags,
                                             errs,
                                             magsarefluxes=magsarefluxes,
                                             sigclip=sigclip)

    # make sure there are enough points to calculate a spectrum
    if len(stimes) > 9 and len(smags) > 9 and len(serrs) > 9:

        # if we're setting up everything automatically
        if isinstance(autofreq, bool) and autofreq:

            # use heuristic to figure out best timestep
            stepsize = 0.25 * mintransitduration / (stimes.max() -
                                                    stimes.min())

            # now figure out the frequencies to use
            minfreq = 1.0 / endp
            maxfreq = 1.0 / startp
            nfreq = int(npceil((maxfreq - minfreq) / stepsize))

            # say what we're using
            if verbose:
                LOGINFO('min P: %s, max P: %s, nfreq: %s, '
                        'minfreq: %s, maxfreq: %s' %
                        (startp, endp, nfreq, minfreq, maxfreq))
                LOGINFO('autofreq = True: using AUTOMATIC values for '
                        'freq stepsize: %s, ndurations: %s, '
                        'min transit duration: %s, max transit duration: %s' %
                        (stepsize, ndurations, mintransitduration,
                         maxtransitduration))

            use_autoperiod = False

        elif isinstance(autofreq, bool) and not autofreq:

            minfreq = 1.0 / endp
            maxfreq = 1.0 / startp
            nfreq = int(npceil((maxfreq - minfreq) / stepsize))

            # say what we're using
            if verbose:
                LOGINFO('min P: %s, max P: %s, nfreq: %s, '
                        'minfreq: %s, maxfreq: %s' %
                        (startp, endp, nfreq, minfreq, maxfreq))
                LOGINFO('autofreq = False: using PROVIDED values for '
                        'freq stepsize: %s, ndurations: %s, '
                        'min transit duration: %s, max transit duration: %s' %
                        (stepsize, ndurations, mintransitduration,
                         maxtransitduration))

            use_autoperiod = False

        elif isinstance(autofreq, str) and autofreq == 'astropy':

            use_autoperiod = True
            minfreq = 1.0 / endp
            maxfreq = 1.0 / startp

        else:

            LOGERROR("unknown autofreq kwarg encountered. can't continue...")
            return None

        # check the minimum frequency
        if ((minfreq < (1.0 / (stimes.max() - stimes.min())))
                and endp_timebase_check):

            LOGWARNING('the requested max P = %.3f is larger than '
                       'the time base of the observations = %.3f, '
                       ' will make minfreq = 2 x 1/timebase' %
                       (endp, stimes.max() - stimes.min()))
            minfreq = 2.0 / (stimes.max() - stimes.min())
            LOGWARNING('new minfreq: %s, maxfreq: %s' % (minfreq, maxfreq))

        # run BLS
        try:

            # astropy's BLS requires durations in units of time
            durations = nplinspace(mintransitduration * startp,
                                   maxtransitduration * startp, ndurations)

            # set up the correct units for the BLS model
            if magsarefluxes:

                blsmodel = BoxLeastSquares(stimes * u.day,
                                           smags * u.dimensionless_unscaled,
                                           dy=serrs * u.dimensionless_unscaled)

            else:

                blsmodel = BoxLeastSquares(stimes * u.day,
                                           smags * u.mag,
                                           dy=serrs * u.mag)

            # use autoperiod if requested
            if use_autoperiod:
                periods = nparray(
                    blsmodel.autoperiod(durations,
                                        minimum_period=startp,
                                        maximum_period=endp,
                                        minimum_n_transit=blsmintransits,
                                        frequency_factor=blsfreqfactor))
                nfreq = periods.size

                if verbose:
                    LOGINFO("autofreq = 'astropy', used .autoperiod() with "
                            "minimum_n_transit = %s, freq_factor = %s "
                            "to generate the frequency grid" %
                            (blsmintransits, blsfreqfactor))
                    LOGINFO(
                        'stepsize = %.5f, nfreq = %s, minfreq = %.5f, '
                        'maxfreq = %.5f, ndurations = %s' %
                        (abs(1.0 / periods[1] - 1.0 / periods[0]), nfreq, 1.0 /
                         periods.max(), 1.0 / periods.min(), durations.size))

            # otherwise, use kbls method
            else:
                frequencies = minfreq + nparange(nfreq) * stepsize
                periods = 1.0 / frequencies

            if nfreq > 5.0e5:
                if verbose:
                    LOGWARNING('more than 5.0e5 frequencies to go through; '
                               'this will take a while. '
                               'you might want to use the '
                               'abls.bls_parallel_pfind function instead')

            # run the periodogram
            blsresult = blsmodel.power(periods * u.day,
                                       durations * u.day,
                                       objective=blsobjective,
                                       method=blsmethod,
                                       oversample=blsoversample)

            # get the peak values
            lsp = nparray(blsresult.power)

            # find the nbestpeaks for the periodogram: 1. sort the lsp array
            # by highest value first 2. go down the values until we find
            # five values that are separated by at least periodepsilon in
            # period
            # make sure to get only the finite peaks in the periodogram
            # this is needed because BLS may produce infs for some peaks
            finitepeakind = npisfinite(lsp)
            finlsp = lsp[finitepeakind]
            finperiods = periods[finitepeakind]

            # make sure that finlsp has finite values before we work on it
            try:

                bestperiodind = npargmax(finlsp)

            except ValueError:

                LOGERROR('no finite periodogram values '
                         'for this mag series, skipping...')

                return {
                    'bestperiod': npnan,
                    'bestlspval': npnan,
                    'nbestpeaks': nbestpeaks,
                    'nbestinds': None,
                    'nbestlspvals': None,
                    'nbestperiods': None,
                    'lspvals': None,
                    'periods': None,
                    'durations': None,
                    'method': 'bls',
                    'blsresult': None,
                    'blsmodel': None,
                    'kwargs': {
                        'startp': startp,
                        'endp': endp,
                        'stepsize': stepsize,
                        'mintransitduration': mintransitduration,
                        'maxtransitduration': maxtransitduration,
                        'ndurations': ndurations,
                        'blsobjective': blsobjective,
                        'blsmethod': blsmethod,
                        'blsoversample': blsoversample,
                        'blsntransits': blsmintransits,
                        'blsfreqfactor': blsfreqfactor,
                        'autofreq': autofreq,
                        'periodepsilon': periodepsilon,
                        'nbestpeaks': nbestpeaks,
                        'sigclip': sigclip,
                        'magsarefluxes': magsarefluxes
                    }
                }

            sortedlspind = npargsort(finlsp)[::-1]
            sortedlspperiods = finperiods[sortedlspind]
            sortedlspvals = finlsp[sortedlspind]

            # now get the nbestpeaks
            nbestperiods, nbestlspvals, nbestinds, peakcount = ([
                finperiods[bestperiodind]
            ], [finlsp[bestperiodind]], [bestperiodind], 1)
            prevperiod = sortedlspperiods[0]

            # find the best nbestpeaks in the lsp and their periods
            for period, lspval, ind in zip(sortedlspperiods, sortedlspvals,
                                           sortedlspind):

                if peakcount == nbestpeaks:
                    break
                perioddiff = abs(period - prevperiod)
                bestperiodsdiff = [abs(period - x) for x in nbestperiods]

                # print('prevperiod = %s, thisperiod = %s, '
                #       'perioddiff = %s, peakcount = %s' %
                #       (prevperiod, period, perioddiff, peakcount))

                # this ensures that this period is different from the last
                # period and from all the other existing best periods by
                # periodepsilon to make sure we jump to an entire different
                # peak in the periodogram
                if (perioddiff > (periodepsilon * prevperiod)
                        and all(x > (periodepsilon * period)
                                for x in bestperiodsdiff)):
                    nbestperiods.append(period)
                    nbestlspvals.append(lspval)
                    nbestinds.append(ind)
                    peakcount = peakcount + 1

                prevperiod = period

            # generate the return dict
            resultdict = {
                'bestperiod': finperiods[bestperiodind],
                'bestlspval': finlsp[bestperiodind],
                'nbestpeaks': nbestpeaks,
                'nbestinds': nbestinds,
                'nbestlspvals': nbestlspvals,
                'nbestperiods': nbestperiods,
                'lspvals': lsp,
                'frequencies': frequencies,
                'periods': periods,
                'durations': durations,
                'blsresult': blsresult,
                'blsmodel': blsmodel,
                'stepsize': stepsize,
                'nfreq': nfreq,
                'mintransitduration': mintransitduration,
                'maxtransitduration': maxtransitduration,
                'method': 'bls',
                'kwargs': {
                    'startp': startp,
                    'endp': endp,
                    'stepsize': stepsize,
                    'mintransitduration': mintransitduration,
                    'maxtransitduration': maxtransitduration,
                    'ndurations': ndurations,
                    'blsobjective': blsobjective,
                    'blsmethod': blsmethod,
                    'blsoversample': blsoversample,
                    'blsntransits': blsmintransits,
                    'blsfreqfactor': blsfreqfactor,
                    'autofreq': autofreq,
                    'periodepsilon': periodepsilon,
                    'nbestpeaks': nbestpeaks,
                    'sigclip': sigclip,
                    'magsarefluxes': magsarefluxes
                }
            }

            return resultdict

        except Exception as e:

            LOGEXCEPTION('BLS failed!')

            if raiseonfail:
                raise

            return {
                'bestperiod': npnan,
                'bestlspval': npnan,
                'nbestinds': None,
                'nbestpeaks': nbestpeaks,
                'nbestlspvals': None,
                'nbestperiods': None,
                'lspvals': None,
                'periods': None,
                'durations': None,
                'blsresult': None,
                'blsmodel': None,
                'stepsize': stepsize,
                'nfreq': nfreq,
                'mintransitduration': mintransitduration,
                'maxtransitduration': maxtransitduration,
                'method': 'bls',
                'kwargs': {
                    'startp': startp,
                    'endp': endp,
                    'stepsize': stepsize,
                    'mintransitduration': mintransitduration,
                    'maxtransitduration': maxtransitduration,
                    'ndurations': ndurations,
                    'blsobjective': blsobjective,
                    'blsmethod': blsmethod,
                    'blsoversample': blsoversample,
                    'blsntransits': blsmintransits,
                    'blsfreqfactor': blsfreqfactor,
                    'autofreq': autofreq,
                    'periodepsilon': periodepsilon,
                    'nbestpeaks': nbestpeaks,
                    'sigclip': sigclip,
                    'magsarefluxes': magsarefluxes
                }
            }

    else:

        LOGERROR('no good detections for these times and mags, skipping...')
        return {
            'bestperiod': npnan,
            'bestlspval': npnan,
            'nbestinds': None,
            'nbestpeaks': nbestpeaks,
            'nbestlspvals': None,
            'nbestperiods': None,
            'lspvals': None,
            'periods': None,
            'durations': None,
            'blsresult': None,
            'blsmodel': None,
            'stepsize': stepsize,
            'nfreq': None,
            'nphasebins': None,
            'mintransitduration': mintransitduration,
            'maxtransitduration': maxtransitduration,
            'method': 'bls',
            'kwargs': {
                'startp': startp,
                'endp': endp,
                'stepsize': stepsize,
                'mintransitduration': mintransitduration,
                'maxtransitduration': maxtransitduration,
                'ndurations': ndurations,
                'blsobjective': blsobjective,
                'blsmethod': blsmethod,
                'blsoversample': blsoversample,
                'blsntransits': blsmintransits,
                'blsfreqfactor': blsfreqfactor,
                'autofreq': autofreq,
                'periodepsilon': periodepsilon,
                'nbestpeaks': nbestpeaks,
                'sigclip': sigclip,
                'magsarefluxes': magsarefluxes
            }
        }
Exemple #9
0
def pdw_period_find(times,
                    mags,
                    errs,
                    autofreq=True,
                    init_p=None,
                    end_p=None,
                    f_step=1.0e-4,
                    phasebinsize=None,
                    sigclip=10.0,
                    nworkers=None,
                    verbose=False):
    '''This is the parallel version of the function above.

    Uses the string length method in Dworetsky 1983 to calculate the period of a
    time-series of magnitude measurements and associated magnitude errors. This
    can optionally bin in phase to try to speed up the calculation.

    PARAMETERS:

    time: series of times at which mags were measured (usually some form of JD)
    mag: timeseries of magnitudes (np.array)
    err: associated errs per magnitude measurement (np.array)
    init_p, end_p: interval to search for periods between (both ends inclusive)
    f_step: step in frequency [days^-1] to use

    RETURNS:

    tuple of the following form:

    (periods (np.array),
     string_lengths (np.array),
     good_period_mask (boolean array))

    '''

    # remove nans
    find = npisfinite(times) & npisfinite(mags) & npisfinite(errs)
    ftimes, fmags, ferrs = times[find], mags[find], errs[find]

    mod_mags = (fmags - npmin(fmags)) / (2.0 *
                                         (npmax(fmags) - npmin(fmags))) - 0.25

    if len(ftimes) > 9 and len(fmags) > 9 and len(ferrs) > 9:

        # get the median and stdev = 1.483 x MAD
        median_mag = np.median(fmags)
        stddev_mag = (np.median(np.abs(fmags - median_mag))) * 1.483

        # sigclip next
        if sigclip:

            sigind = (np.abs(fmags - median_mag)) < (sigclip * stddev_mag)

            stimes = ftimes[sigind]
            smags = fmags[sigind]
            serrs = ferrs[sigind]

            LOGINFO('sigclip = %s: before = %s observations, '
                    'after = %s observations' %
                    (sigclip, len(times), len(stimes)))

        else:

            stimes = ftimes
            smags = fmags
            serrs = ferrs

        # make sure there are enough points to calculate a spectrum
        if len(stimes) > 9 and len(smags) > 9 and len(serrs) > 9:

            # get the frequencies to use
            if init_p:
                endf = 1.0 / init_p
            else:
                # default start period is 0.1 day
                endf = 1.0 / 0.1

            if end_p:
                startf = 1.0 / end_p
            else:
                # default end period is length of time series
                startf = 1.0 / (stimes.max() - stimes.min())

            # if we're not using autofreq, then use the provided frequencies
            if not autofreq:
                frequencies = np.arange(startf, endf, stepsize)
                LOGINFO(
                    'using %s frequency points, start P = %.3f, end P = %.3f' %
                    (frequencies.size, 1.0 / endf, 1.0 / startf))
            else:
                # this gets an automatic grid of frequencies to use
                frequencies = get_frequency_grid(stimes,
                                                 minfreq=startf,
                                                 maxfreq=endf)
                LOGINFO('using autofreq with %s frequency points, '
                        'start P = %.3f, end P = %.3f' %
                        (frequencies.size, 1.0 / frequencies.max(),
                         1.0 / frequencies.min()))

            # set up some internal stuff
            fold_time = npmin(ftimes)  # fold at the first time element
            j_range = len(fmags) - 1
            epsilon = 2.0 * npmean(ferrs)
            delta_l = 0.34 * (epsilon - 0.5 *
                              (epsilon**2)) * (len(ftimes) -
                                               npsqrt(10.0 / epsilon))
            keep_threshold_1 = 1.6 + 1.2 * delta_l
            l = 0.212 * len(ftimes)
            sig_l = len(ftimes) / 37.5
            keep_threshold_2 = l + 4.0 * sig_l

            # generate the tasks
            tasks = [(x, ftimes, mod_mags, fold_time, j_range,
                      keep_threshold_1, keep_threshold_2, phasebinsize)
                     for x in frequencies]

            # fire up the pool and farm out the tasks
            if (not nworkers) or (nworkers > NCPUS):
                nworkers = NCPUS
                LOGINFO('using %s workers...' % nworkers)

            pool = Pool(nworkers)
            strlen_results = pool.map(pdw_worker, tasks)
            pool.close()
            pool.join()
            del pool

            periods, strlens, goodflags = zip(*strlen_results)
            periods, strlens, goodflags = (np.array(periods),
                                           np.array(strlens),
                                           np.array(goodflags))

            strlensort = npargsort(strlens)
            nbeststrlens = strlens[strlensort[:5]]
            nbestperiods = periods[strlensort[:5]]
            nbestflags = goodflags[strlensort[:5]]
            bestperiod = nbestperiods[0]
            beststrlen = nbeststrlens[0]
            bestflag = nbestflags[0]

            return {
                'bestperiod': bestperiod,
                'beststrlen': beststrlen,
                'bestflag': bestflag,
                'nbeststrlens': nbeststrlens,
                'nbestperiods': nbestperiods,
                'nbestflags': nbestflags,
                'strlens': strlens,
                'periods': periods,
                'goodflags': goodflags
            }

        else:

            LOGERROR(
                'no good detections for these times and mags, skipping...')
            return {
                'bestperiod': npnan,
                'beststrlen': npnan,
                'bestflag': npnan,
                'nbeststrlens': None,
                'nbestperiods': None,
                'nbestflags': None,
                'strlens': None,
                'periods': None,
                'goodflags': None
            }
    else:

        LOGERROR('no good detections for these times and mags, skipping...')
        return {
            'bestperiod': npnan,
            'beststrlen': npnan,
            'bestflag': npnan,
            'nbeststrlens': None,
            'nbestperiods': None,
            'nbestflags': None,
            'strlens': None,
            'periods': None,
            'goodflags': None
        }
Exemple #10
0
def sigclip_magseries(times,
                      mags,
                      errs,
                      sigclip=None,
                      iterative=False,
                      niterations=None,
                      meanormedian='median',
                      magsarefluxes=False):
    '''
    Select the finite times, magnitudes (or fluxes), and errors from the
    passed values, and apply symmetric or asymmetric sigma clipping to them.
    Returns sigma-clipped times, mags, and errs.

    Args:
        times (np.array): ...

        mags (np.array): numpy array to sigma-clip. Does not assume all values
        are finite. Does not assume anything about whether they're
        positive/negative.

        errs (np.array): ...

        iterative (bool): True if you want iterative sigma-clipping. If
        niterations is not set and this is True, sigma-clipping is iterated
        until no more points are removed.

        niterations (int): maximum number of iterations to perform for
        sigma-clipping. If None, the iterative arg takes precedence, and
        iterative=True will sigma-clip until no more points are removed.
        If niterations is not None and iterative is False, niterations takes
        precedence and iteration will occur.

        meanormedian (string): either 'mean' for sigma-clipping based on the
        mean value, or 'median' for sigma-clipping based on the median value.
        Default is 'median'.

        magsarefluxes (bool): True if your "mags" are in fact fluxes, i.e. if
        "dimming" corresponds to your "mags" getting smaller.

        sigclip (float or list): If float, apply symmetric sigma clipping. If
        list, e.g., [10., 3.], will sigclip out greater than 10-sigma dimmings
        and greater than 3-sigma brightenings. Here the meaning of "dimming"
        and "brightening" is set by *physics* (not the magnitude system), which
        is why the `magsarefluxes` kwarg must be correctly set.

    Returns:
        stimes, smags, serrs: (sigmaclipped values of each).
    '''

    returnerrs = True

    # fake the errors if they don't exist
    # this is inconsequential to sigma-clipping
    # we don't return these dummy values if the input errs are None
    if errs is None:
        # assume 0.1% errors if not given
        # this should work for mags and fluxes
        errs = 0.001 * mags
        returnerrs = False

    # filter the input times, mags, errs; do sigclipping and normalization
    find = npisfinite(times) & npisfinite(mags) & npisfinite(errs)
    ftimes, fmags, ferrs = times[find], mags[find], errs[find]

    # get the center value and stdev
    if meanormedian == 'median':  # stddev = 1.483 x MAD

        center_mag = npmedian(fmags)
        stddev_mag = (npmedian(npabs(fmags - center_mag))) * 1.483

    elif meanormedian == 'mean':

        center_mag = npmean(fmags)
        stddev_mag = npstddev(fmags)

    else:
        LOGWARNING("unrecognized meanormedian value given to "
                   "sigclip_magseries: %s, defaulting to 'median'" %
                   meanormedian)
        meanormedian = 'median'
        center_mag = npmedian(fmags)
        stddev_mag = (npmedian(npabs(fmags - center_mag))) * 1.483

    # sigclip next for a single sigclip value
    if sigclip and isinstance(sigclip, (float, int)):

        if not iterative and niterations is None:

            sigind = (npabs(fmags - center_mag)) < (sigclip * stddev_mag)

            stimes = ftimes[sigind]
            smags = fmags[sigind]
            serrs = ferrs[sigind]

        else:

            #
            # iterative version adapted from scipy.stats.sigmaclip
            #

            # First, if niterations is not set, iterate until covergence
            if niterations is None:

                delta = 1

                this_times = ftimes
                this_mags = fmags
                this_errs = ferrs

                while delta:

                    if meanormedian == 'mean':
                        this_center = npmean(this_mags)
                        this_stdev = npstddev(this_mags)
                    elif meanormedian == 'median':
                        this_center = npmedian(this_mags)
                        this_stdev = (npmedian(
                            npabs(this_mags - this_center))) * 1.483
                    this_size = this_mags.size

                    # apply the sigclip
                    tsi = ((npabs(this_mags - this_center)) <
                           (sigclip * this_stdev))

                    # update the arrays
                    this_times = this_times[tsi]
                    this_mags = this_mags[tsi]
                    this_errs = this_errs[tsi]

                    # update delta and go to the top of the loop
                    delta = this_size - this_mags.size

            else:  # If iterating only a certain number of times

                this_times = ftimes
                this_mags = fmags
                this_errs = ferrs

                iter_num = 0
                delta = 1
                while iter_num < niterations and delta:

                    if meanormedian == 'mean':

                        this_center = npmean(this_mags)
                        this_stdev = npstddev(this_mags)

                    elif meanormedian == 'median':

                        this_center = npmedian(this_mags)
                        this_stdev = (npmedian(
                            npabs(this_mags - this_center))) * 1.483
                    this_size = this_mags.size

                    # apply the sigclip
                    tsi = ((npabs(this_mags - this_center)) <
                           (sigclip * this_stdev))

                    # update the arrays
                    this_times = this_times[tsi]
                    this_mags = this_mags[tsi]
                    this_errs = this_errs[tsi]

                    # update the number of iterations and delta and
                    # go to the top of the loop
                    delta = this_size - this_mags.size
                    iter_num += 1

            # final sigclipped versions
            stimes, smags, serrs = this_times, this_mags, this_errs

    # this handles sigclipping for asymmetric +ve and -ve clip values
    elif sigclip and isinstance(sigclip, list) and len(sigclip) == 2:

        # sigclip is passed as [dimmingclip, brighteningclip]
        dimmingclip = sigclip[0]
        brighteningclip = sigclip[1]

        if not iterative and niterations is None:

            if magsarefluxes:
                nottoodimind = ((fmags - center_mag) >
                                (-dimmingclip * stddev_mag))
                nottoobrightind = ((fmags - center_mag) <
                                   (brighteningclip * stddev_mag))
            else:
                nottoodimind = ((fmags - center_mag) <
                                (dimmingclip * stddev_mag))
                nottoobrightind = ((fmags - center_mag) >
                                   (-brighteningclip * stddev_mag))

            sigind = nottoodimind & nottoobrightind

            stimes = ftimes[sigind]
            smags = fmags[sigind]
            serrs = ferrs[sigind]

        else:

            #
            # iterative version adapted from scipy.stats.sigmaclip
            #
            if niterations is None:

                delta = 1

                this_times = ftimes
                this_mags = fmags
                this_errs = ferrs

                while delta:

                    if meanormedian == 'mean':

                        this_center = npmean(this_mags)
                        this_stdev = npstddev(this_mags)

                    elif meanormedian == 'median':
                        this_center = npmedian(this_mags)
                        this_stdev = (npmedian(
                            npabs(this_mags - this_center))) * 1.483
                    this_size = this_mags.size

                    if magsarefluxes:
                        nottoodimind = ((this_mags - this_center) >
                                        (-dimmingclip * this_stdev))
                        nottoobrightind = ((this_mags - this_center) <
                                           (brighteningclip * this_stdev))
                    else:
                        nottoodimind = ((this_mags - this_center) <
                                        (dimmingclip * this_stdev))
                        nottoobrightind = ((this_mags - this_center) >
                                           (-brighteningclip * this_stdev))

                    # apply the sigclip
                    tsi = nottoodimind & nottoobrightind

                    # update the arrays
                    this_times = this_times[tsi]
                    this_mags = this_mags[tsi]
                    this_errs = this_errs[tsi]

                    # update delta and go to top of the loop
                    delta = this_size - this_mags.size

            else:  # If iterating only a certain number of times
                this_times = ftimes
                this_mags = fmags
                this_errs = ferrs

                iter_num = 0
                delta = 1

                while iter_num < niterations and delta:

                    if meanormedian == 'mean':
                        this_center = npmean(this_mags)
                        this_stdev = npstddev(this_mags)
                    elif meanormedian == 'median':
                        this_center = npmedian(this_mags)
                        this_stdev = (npmedian(
                            npabs(this_mags - this_center))) * 1.483
                    this_size = this_mags.size

                    if magsarefluxes:
                        nottoodimind = ((this_mags - this_center) >
                                        (-dimmingclip * this_stdev))
                        nottoobrightind = ((this_mags - this_center) <
                                           (brighteningclip * this_stdev))
                    else:
                        nottoodimind = ((this_mags - this_center) <
                                        (dimmingclip * this_stdev))
                        nottoobrightind = ((this_mags - this_center) >
                                           (-brighteningclip * this_stdev))

                    # apply the sigclip
                    tsi = nottoodimind & nottoobrightind

                    # update the arrays
                    this_times = this_times[tsi]
                    this_mags = this_mags[tsi]
                    this_errs = this_errs[tsi]

                    # update the number of iterations and delta
                    # and go to top of the loop
                    delta = this_size - this_mags.size
                    iter_num += 1

            # final sigclipped versions
            stimes, smags, serrs = this_times, this_mags, this_errs

    else:

        stimes = ftimes
        smags = fmags
        serrs = ferrs

    if returnerrs:
        return stimes, smags, serrs
    else:
        return stimes, smags, None
Exemple #11
0
def gilliland_cdpp(
        times,
        mags,
        errs,
        windowlength=97,
        polyorder=2,
        binsize=23400,  # in seconds: 6.5 hours for classic CDPP
        sigclip=5.0,
        magsarefluxes=False,
        **kwargs):
    '''This calculates the CDPP of a timeseries using the method in the paper:

    Gilliland, R. L., Chaplin, W. J., Dunham, E. W., et al. 2011, ApJS, 197, 6
    http://adsabs.harvard.edu/abs/2011ApJS..197....6G

    use magsarefluxes to indicate if the values in the mags array are actually
    flux values.

    The steps are:

    - pass the time-series through a Savitsky-Golay filter

      - we use scipy.signal.savgol_filter, **kwargs are passed to this

      - also see: http://scipy.github.io/old-wiki/pages/Cookbook/SavitzkyGolay

      - the windowlength is the number of LC points to use (Kepler uses 2 days =
        (1440 minutes/day / 30 minutes/LC point) x 2 days = 96 -> 97 LC points)

      - the polyorder is a quadratic by default

    - subtract the smoothed time-series from the actual light curve

    - sigma clip the remaining LC

    - get the binned mag series by averaging over 6.5 hour bins, only retaining
      bins with at least 7 points

    - the standard deviation of the binned averages is the CDPP

    - multiply this by 1.168 to correct for over-subtraction of white-noise

    '''

    # if no errs are given, assume 0.1% errors
    if errs is None:
        errs = 0.001 * mags

    # get rid of nans first
    find = npisfinite(times) & npisfinite(mags) & npisfinite(errs)
    ftimes = times[find]
    fmags = mags[find]
    ferrs = errs[find]

    if ftimes.size < (3 * windowlength):
        LOGERROR('not enough LC points to calculate CDPP')
        return npnan

    # now get the smoothed mag series using the filter
    # kwargs are provided to the savgol_filter function
    smoothed = savgol_filter(fmags, windowlength, polyorder, **kwargs)
    subtracted = fmags - smoothed

    # sigclip the subtracted light curve
    stimes, smags, serrs = sigclip_magseries(ftimes,
                                             fmags,
                                             ferrs,
                                             magsarefluxes=magsarefluxes)

    # bin over 6.5 hour bins and throw away all bins with less than 7 elements
    binned = time_bin_magseries_with_errs(stimes,
                                          smags,
                                          serrs,
                                          binsize=binsize,
                                          minbinelems=7)

    btimes, bmags, berrs = (binned['binnedtimes'], binned['binnedmags'],
                            binned['binnederrs'])

    # stdev of bin mags x 1.168 -> CDPP
    cdpp = npstd(bmags) * 1.168

    return cdpp
Exemple #12
0
def gilliland_cdpp(
        times,
        mags,
        errs,
        windowlength=97,
        polyorder=2,
        binsize=23400,  # in seconds: 6.5 hours for classic CDPP
        sigclip=5.0,
        magsarefluxes=False,
        **kwargs):
    '''This calculates the CDPP of a timeseries using the method in the paper:

    Gilliland, R. L., Chaplin, W. J., Dunham, E. W., et al. 2011, ApJS, 197, 6
    (http://adsabs.harvard.edu/abs/2011ApJS..197....6G)

    The steps are:

    - pass the time-series through a Savitsky-Golay filter.

      - we use `scipy.signal.savgol_filter`, `**kwargs` are passed to this.

      - also see: http://scipy.github.io/old-wiki/pages/Cookbook/SavitzkyGolay.

      - the `windowlength` is the number of LC points to use (Kepler uses 2 days
        = (1440 minutes/day / 30 minutes/LC point) x 2 days = 96 -> 97 LC
        points).

      - the `polyorder` is a quadratic by default.


    - subtract the smoothed time-series from the actual light curve.

    - sigma clip the remaining LC.

    - get the binned mag series by averaging over 6.5 hour bins, only retaining
      bins with at least 7 points.

    - the standard deviation of the binned averages is the CDPP.

    - multiply this by 1.168 to correct for over-subtraction of white-noise.


    Parameters
    ----------

    times,mags,errs : np.array
        The input mag/flux time-series to calculate CDPP for.

    windowlength : int
        The smoothing window size to use.

    polyorder : int
        The polynomial order to use in the Savitsky-Golay smoothing.

    binsize : int
        The bin size to use for binning the light curve.

    sigclip : float or int or sequence of two floats/ints or None
        If a single float or int, a symmetric sigma-clip will be performed using
        the number provided as the sigma-multiplier to cut out from the input
        time-series.

        If a list of two ints/floats is provided, the function will perform an
        'asymmetric' sigma-clip. The first element in this list is the sigma
        value to use for fainter flux/mag values; the second element in this
        list is the sigma value to use for brighter flux/mag values. For
        example, `sigclip=[10., 3.]`, will sigclip out greater than 10-sigma
        dimmings and greater than 3-sigma brightenings. Here the meaning of
        "dimming" and "brightening" is set by *physics* (not the magnitude
        system), which is why the `magsarefluxes` kwarg must be correctly set.

        If `sigclip` is None, no sigma-clipping will be performed, and the
        time-series (with non-finite elems removed) will be passed through to
        the output.

    magsarefluxes : bool
        If True, indicates the input time-series is fluxes and not mags.

    kwargs : additional kwargs
        These are passed directly to `scipy.signal.savgol_filter`.

    Returns
    -------

    float
        The calculated CDPP value.

    '''

    # if no errs are given, assume 0.1% errors
    if errs is None:
        errs = 0.001 * mags

    # get rid of nans first
    find = npisfinite(times) & npisfinite(mags) & npisfinite(errs)
    ftimes = times[find]
    fmags = mags[find]
    ferrs = errs[find]

    if ftimes.size < (3 * windowlength):
        LOGERROR('not enough LC points to calculate CDPP')
        return npnan

    # now get the smoothed mag series using the filter
    # kwargs are provided to the savgol_filter function
    smoothed = savgol_filter(fmags, windowlength, polyorder, **kwargs)
    subtracted = fmags - smoothed

    # sigclip the subtracted light curve
    stimes, smags, serrs = sigclip_magseries(ftimes,
                                             subtracted,
                                             ferrs,
                                             magsarefluxes=magsarefluxes)

    # bin over 6.5 hour bins and throw away all bins with less than 7 elements
    binned = time_bin_magseries_with_errs(stimes,
                                          smags,
                                          serrs,
                                          binsize=binsize,
                                          minbinelems=7)
    bmags = binned['binnedmags']

    # stdev of bin mags x 1.168 -> CDPP
    cdpp = npstd(bmags) * 1.168

    return cdpp
Exemple #13
0
def plot_mag_series(times,
                    mags,
                    errs=None,
                    outfile=None,
                    sigclip=30.0,
                    timebin=None,
                    yrange=None):
    '''This plots a magnitude time series.

    If outfile is none, then plots to matplotlib interactive window. If outfile
    is a string denoting a filename, uses that to write a png/eps/pdf figure.

    timebin is either a float indicating binsize in seconds, or None indicating
    no time-binning is required.

    '''

    if errs is not None:

        # remove nans
        find = npisfinite(times) & npisfinite(mags) & npisfinite(errs)
        ftimes, fmags, ferrs = times[find], mags[find], errs[find]

        # get the median and stdev = 1.483 x MAD
        median_mag = npmedian(fmags)
        stddev_mag = (npmedian(npabs(fmags - median_mag))) * 1.483

        # sigclip next
        if sigclip:

            sigind = (npabs(fmags - median_mag)) < (sigclip * stddev_mag)

            stimes = ftimes[sigind]
            smags = fmags[sigind]
            serrs = ferrs[sigind]

            LOGINFO('sigclip = %s: before = %s observations, '
                    'after = %s observations' %
                    (sigclip, len(times), len(stimes)))

        else:

            stimes = ftimes
            smags = fmags
            serrs = ferrs

    else:

        # remove nans
        find = npisfinite(times) & npisfinite(mags)
        ftimes, fmags, ferrs = times[find], mags[find], None

        # get the median and stdev = 1.483 x MAD
        median_mag = npmedian(fmags)
        stddev_mag = (npmedian(npabs(fmags - median_mag))) * 1.483

        # sigclip next
        if sigclip:

            sigind = (npabs(fmags - median_mag)) < (sigclip * stddev_mag)

            stimes = ftimes[sigind]
            smags = fmags[sigind]
            serrs = None

            LOGINFO('sigclip = %s: before = %s observations, '
                    'after = %s observations' %
                    (sigclip, len(times), len(stimes)))

        else:

            stimes = ftimes
            smags = fmags
            serrs = None

    # now we proceed to binning
    if timebin and errs is not None:

        binned = time_bin_magseries_with_errs(stimes, smags, serrs,
                                              binsize=timebin)
        btimes, bmags, berrs = (binned['binnedtimes'],
                                binned['binnedmags'],
                                binned['binnederrs'])

    elif timebin and errs is None:

        binned = time_bin_magseries(stimes, smags,
                                    binsize=timebin)
        btimes, bmags, berrs = binned['binnedtimes'], binned['binnedmags'], None

    else:

        btimes, bmags, berrs = stimes, smags, serrs


    # finally, proceed with plotting
    fig = plt.figure()
    fig.set_size_inches(7.5,4.8)

    plt.errorbar(btimes, bmags, fmt='go', yerr=berrs,
                 markersize=2.0, markeredgewidth=0.0, ecolor='grey',
                 capsize=0)

    # make a grid
    plt.grid(color='#a9a9a9',
             alpha=0.9,
             zorder=0,
             linewidth=1.0,
             linestyle=':')

    # fix the ticks to use no offsets
    plt.gca().get_yaxis().get_major_formatter().set_useOffset(False)
    plt.gca().get_xaxis().get_major_formatter().set_useOffset(False)

    # get the yrange
    if yrange and isinstance(yrange,list) and len(yrange) == 2:
        ymin, ymax = yrange
    else:
        ymin, ymax = plt.ylim()
    plt.ylim(ymax,ymin)

    plt.xlim(npmin(btimes) - 0.001*npmin(btimes),
             npmax(btimes) + 0.001*npmin(btimes))

    plt.xlabel('time [JD]')
    plt.ylabel('magnitude')

    if outfile and isinstance(outfile, str):

        plt.savefig(outfile,bbox_inches='tight')
        plt.close()
        return os.path.abspath(outfile)

    else:

        plt.show()
        plt.close()
        return
Exemple #14
0
def plot_phased_mag_series(times,
                           mags,
                           period,
                           errs=None,
                           epoch='min',
                           outfile=None,
                           sigclip=30.0,
                           phasewrap=True,
                           phasesort=True,
                           phasebin=None,
                           plotphaselim=[-0.8,0.8],
                           yrange=None):
    '''This plots a phased magnitude time series using the period provided.

    If epoch is None, uses the min(times) as the epoch.

    If epoch is a string 'min', then fits a cubic spline to the phased light
    curve using min(times), finds the magnitude minimum from the fitted light
    curve, then uses the corresponding time value as the epoch.

    If epoch is a float, then uses that directly to phase the light curve and as
    the epoch of the phased mag series plot.

    If outfile is none, then plots to matplotlib interactive window. If outfile
    is a string denoting a filename, uses that to write a png/eps/pdf figure.

    '''

    if errs is not None:

        # remove nans
        find = npisfinite(times) & npisfinite(mags) & npisfinite(errs)
        ftimes, fmags, ferrs = times[find], mags[find], errs[find]

        # get the median and stdev = 1.483 x MAD
        median_mag = npmedian(fmags)
        stddev_mag = (npmedian(npabs(fmags - median_mag))) * 1.483

        # sigclip next
        if sigclip:

            sigind = (npabs(fmags - median_mag)) < (sigclip * stddev_mag)

            stimes = ftimes[sigind]
            smags = fmags[sigind]
            serrs = ferrs[sigind]

            LOGINFO('sigclip = %s: before = %s observations, '
                    'after = %s observations' %
                    (sigclip, len(times), len(stimes)))

        else:

            stimes = ftimes
            smags = fmags
            serrs = ferrs

    else:

        # remove nans
        find = npisfinite(times) & npisfinite(mags)
        ftimes, fmags, ferrs = times[find], mags[find], None

        # get the median and stdev = 1.483 x MAD
        median_mag = npmedian(fmags)
        stddev_mag = (npmedian(npabs(fmags - median_mag))) * 1.483

        # sigclip next
        if sigclip:

            sigind = (npabs(fmags - median_mag)) < (sigclip * stddev_mag)

            stimes = ftimes[sigind]
            smags = fmags[sigind]
            serrs = None

            LOGINFO('sigclip = %s: before = %s observations, '
                    'after = %s observations' %
                    (sigclip, len(times), len(stimes)))

        else:

            stimes = ftimes
            smags = fmags
            serrs = None


    # figure out the epoch, if it's None, use the min of the time
    if epoch is None:

        epoch = npmin(stimes)

    # if the epoch is 'min', then fit a spline to the light curve phased
    # using the min of the time, find the fit mag minimum and use the time for
    # that as the epoch
    elif isinstance(epoch,str) and epoch == 'min':

        spfit = spline_fit_magseries(stimes, smags, serrs, period)
        epoch = spfit['fitepoch']


    # now phase (and optionally, phase bin the light curve)
    if errs is not None:

        # phase the magseries
        phasedlc = phase_magseries_with_errs(stimes,
                                             smags,
                                             serrs,
                                             period,
                                             epoch,
                                             wrap=phasewrap,
                                             sort=phasesort)
        plotphase = phasedlc['phase']
        plotmags = phasedlc['mags']
        ploterrs = phasedlc['errs']

        # if we're supposed to bin the phases, do so
        if phasebin:

            binphasedlc = phase_bin_magseries_with_errs(plotphase,
                                                        plotmags,
                                                        ploterrs,
                                                        binsize=phasebin)
            plotphase = binphasedlc['binnedphases']
            plotmags = binphasedlc['binnedmags']
            ploterrs = binphasedlc['binnederrs']

    else:

        # phase the magseries
        phasedlc = phase_magseries(stimes,
                                   smags,
                                   period,
                                   epoch,
                                   wrap=phasewrap,
                                   sort=phasesort)
        plotphase = phasedlc['phase']
        plotmags = phasedlc['mags']
        ploterrs = None

        # if we're supposed to bin the phases, do so
        if phasebin:

            binphasedlc = phase_bin_magseries(plotphase,
                                              plotmags,
                                              binsize=phasebin)
            plotphase = binphasedlc['binnedphases']
            plotmags = binphasedlc['binnedmags']
            ploterrs = None


    # finally, make the plots

    # initialize the plot
    fig = plt.figure()
    fig.set_size_inches(7.5,4.8)

    plt.errorbar(plotphase, plotmags, fmt='bo', yerr=ploterrs,
                 markersize=2.0, markeredgewidth=0.0, ecolor='#B2BEB5',
                 capsize=0)

    # make a grid
    plt.grid(color='#a9a9a9',
             alpha=0.9,
             zorder=0,
             linewidth=1.0,
             linestyle=':')

    # make lines for phase 0.0, 0.5, and -0.5
    plt.axvline(0.0,alpha=0.9,linestyle='dashed',color='g')
    plt.axvline(-0.5,alpha=0.9,linestyle='dashed',color='g')
    plt.axvline(0.5,alpha=0.9,linestyle='dashed',color='g')

    # fix the ticks to use no offsets
    plt.gca().get_yaxis().get_major_formatter().set_useOffset(False)
    plt.gca().get_xaxis().get_major_formatter().set_useOffset(False)

    # get the yrange
    if yrange and isinstance(yrange,list) and len(yrange) == 2:
        ymin, ymax = yrange
    else:
        ymin, ymax = plt.ylim()
    plt.ylim(ymax,ymin)

    # set the x axis limit
    if not plotphaselim:
        plot_xlim = plt.xlim()
        plt.xlim((npmin(plotphase)-0.1,
                  npmax(plotphase)+0.1))
    else:
        plt.xlim((plotphaselim[0],plotphaselim[1]))

    # set up the labels
    plt.xlabel('phase')
    plt.ylabel('magnitude')
    plt.title('using period: %.6f d and epoch: %.6f' % (period, epoch))

    # make the figure
    if outfile and isinstance(outfile, str):

        plt.savefig(outfile,bbox_inches='tight')
        plt.close()
        return os.path.abspath(outfile)

    else:

        plt.show()
        plt.close()
        return
Exemple #15
0
def tls_parallel_pfind(
        times,
        mags,
        errs,
        magsarefluxes=None,
        startp=0.1,  # search from 0.1 d to...
        endp=None,  # determine automatically from times
        tls_oversample=5,
        tls_mintransits=3,
        tls_transit_template='default',
        tls_rstar_min=0.13,
        tls_rstar_max=3.5,
        tls_mstar_min=0.1,
        tls_mstar_max=2.0,
        periodepsilon=0.1,
        nbestpeaks=5,
        sigclip=10.0,
        verbose=True,
        nworkers=None):
    """Wrapper to Hippke & Heller (2019)'s "transit least squares", which is BLS,
    but with a slightly better template (and niceties in the implementation).

    A few comments:

    * The time series must be in units of days.

    * The frequency sampling Hippke & Heller (2019) advocate for is cubic in
      frequencies, instead of linear. Ofir (2014) found that the
      linear-in-frequency sampling (which is correct for sinusoidal signal
      detection) isn't optimal for a Keplerian box signal. He gave an equation
      for "optimal" sampling. `tlsoversample` is the factor by which to
      oversample over that. The grid can be imported independently via::

        from transitleastsquares import period_grid

      The spacing equations are given here:
      https://transitleastsquares.readthedocs.io/en/latest/Python%20interface.html#period-grid

    * The boundaries of the period search are by default 0.1 day to 99% the
      baseline of times.

    Parameters
    ----------

    times,mags,errs : np.array
        The magnitude/flux time-series to search for transits.

    magsarefluxes : bool
        `transitleastsquares` requires fluxes. Therefore if magsarefluxes is
        set to false, the passed mags are converted to fluxes. All output
        dictionary vectors include fluxes, not mags.

    startp,endp : float
        The minimum and maximum periods to consider for the transit search.

    tls_oversample : int
        Factor by which to oversample the frequency grid.

    tls_mintransits : int
        Sets the `min_n_transits` kwarg for the `BoxLeastSquares.autoperiod()`
        function.

    tls_transit_template: str
        `default`, `grazing`, or `box`.

    tls_rstar_min,tls_rstar_max : float
        The range of stellar radii to consider when generating a frequency
        grid. In uniits of Rsun.

    tls_mstar_min,tls_mstar_max : float
        The range of stellar masses to consider when generating a frequency
        grid. In units of Msun.

    periodepsilon : float
        The fractional difference between successive values of 'best' periods
        when sorting by periodogram power to consider them as separate periods
        (as opposed to part of the same periodogram peak). This is used to avoid
        broad peaks in the periodogram and make sure the 'best' periods returned
        are all actually independent.

    nbestpeaks : int
        The number of 'best' peaks to return from the periodogram results,
        starting from the global maximum of the periodogram peak values.

    sigclip : float or int or sequence of two floats/ints or None
        If a single float or int, a symmetric sigma-clip will be performed using
        the number provided as the sigma-multiplier to cut out from the input
        time-series.

        If a list of two ints/floats is provided, the function will perform an
        'asymmetric' sigma-clip. The first element in this list is the sigma
        value to use for fainter flux/mag values; the second element in this
        list is the sigma value to use for brighter flux/mag values. For
        example, `sigclip=[10., 3.]`, will sigclip out greater than 10-sigma
        dimmings and greater than 3-sigma brightenings. Here the meaning of
        "dimming" and "brightening" is set by *physics* (not the magnitude
        system), which is why the `magsarefluxes` kwarg must be correctly set.

        If `sigclip` is None, no sigma-clipping will be performed, and the
        time-series (with non-finite elems removed) will be passed through to
        the output.

    verbose : bool
        Kept for consistency with `periodbase` functions.

    nworkers : int or None
        The number of parallel workers to launch for period-search. If None,
        nworkers = NCPUS.

    Returns
    -------

    dict
        This function returns a dict, referred to as an `lspinfo` dict in other
        astrobase functions that operate on periodogram results. The format is
        similar to the other astrobase period-finders -- it contains the
        nbestpeaks, which is the most important thing. (But isn't entirely
        standardized.)

        Crucially, it also contains "tlsresult", which is a dictionary with
        transitleastsquares spectra (used to get the SDE as defined in the TLS
        paper), statistics, transit period, mid-time, duration, depth, SNR, and
        the "odd_even_mismatch" statistic. The full key list is::

            dict_keys(['SDE', 'SDE_raw', 'chi2_min', 'chi2red_min', 'period',
            'period_uncertainty', 'T0', 'duration', 'depth', 'depth_mean',
            'depth_mean_even', 'depth_mean_odd', 'transit_depths',
            'transit_depths_uncertainties', 'rp_rs', 'snr', 'snr_per_transit',
            'snr_pink_per_transit', 'odd_even_mismatch', 'transit_times',
            'per_transit_count', 'transit_count', 'distinct_transit_count',
            'empty_transit_count', 'FAP', 'in_transit_count',
            'after_transit_count', 'before_transit_count', 'periods',
            'power', 'power_raw', 'SR', 'chi2',
            'chi2red', 'model_lightcurve_time', 'model_lightcurve_model',
            'model_folded_phase', 'folded_y', 'folded_dy', 'folded_phase',
            'model_folded_model'])

        The descriptions are here:

        https://transitleastsquares.readthedocs.io/en/latest/Python%20interface.html#return-values

        The remaining resultdict is::

            resultdict = {
                'tlsresult':tlsresult,
                'bestperiod': the best period value in the periodogram,
                'bestlspval': the peak associated with the best period,
                'nbestpeaks': the input value of nbestpeaks,
                'nbestlspvals': nbestpeaks-size list of best period peak values,
                'nbestperiods': nbestpeaks-size list of best periods,
                'lspvals': the full array of periodogram powers,
                'periods': the full array of periods considered,
                'tlsresult': Astropy tls result object (BoxLeastSquaresResult),
                'tlsmodel': Astropy tls BoxLeastSquares object used for work,
                'method':'tls' -> the name of the period-finder method,
                'kwargs':{ dict of all of the input kwargs for record-keeping}
            }
    """

    # set NCPUS for HTLS
    if nworkers is None:
        nworkers = NCPUS

    # convert mags to fluxes because this method requires them
    if not magsarefluxes:

        LOGWARNING('transitleastsquares requires relative flux...')
        LOGWARNING('converting input mags to relative flux...')
        LOGWARNING('and forcing magsarefluxes=True...')

        mag_0, f_0 = 12.0, 1.0e4
        flux = f_0 * 10.0**(-0.4 * (mags - mag_0))
        flux /= np.nanmedian(flux)

        # if the errors are provided as mag errors, convert them to flux
        if errs is not None:
            flux_errs = flux * (errs / mags)
        else:
            flux_errs = None

        mags = flux
        errs = flux_errs

        magsarefluxes = True

    # uniform weights for errors if none given
    if errs is None:
        errs = np.ones_like(mags) * 1.0e-4

    # get rid of nans first and sigclip
    stimes, smags, serrs = sigclip_magseries(times,
                                             mags,
                                             errs,
                                             magsarefluxes=magsarefluxes,
                                             sigclip=sigclip)

    stimes, smags, serrs = resort_by_time(stimes, smags, serrs)

    # make sure there are enough points to calculate a spectrum
    if not (len(stimes) > 9 and len(smags) > 9 and len(serrs) > 9):

        LOGERROR('no good detections for these times and mags, skipping...')
        resultdict = {
            'tlsresult': npnan,
            'bestperiod': npnan,
            'bestlspval': npnan,
            'nbestpeaks': nbestpeaks,
            'nbestinds': None,
            'nbestlspvals': None,
            'nbestperiods': None,
            'lspvals': None,
            'periods': None,
            'method': 'tls',
            'kwargs': {
                'startp': startp,
                'endp': endp,
                'tls_oversample': tls_oversample,
                'tls_ntransits': tls_mintransits,
                'tls_transit_template': tls_transit_template,
                'tls_rstar_min': tls_rstar_min,
                'tls_rstar_max': tls_rstar_max,
                'tls_mstar_min': tls_mstar_min,
                'tls_mstar_max': tls_mstar_max,
                'periodepsilon': periodepsilon,
                'nbestpeaks': nbestpeaks,
                'sigclip': sigclip,
                'magsarefluxes': magsarefluxes
            }
        }
        return resultdict

    # if the end period is not provided, set it to
    # 99% of the time baseline. (for two transits).
    if endp is None:
        endp = 0.99 * (np.nanmax(stimes) - np.nanmin(stimes))

    # run periodogram
    model = transitleastsquares(stimes, smags, serrs)
    tlsresult = model.power(use_threads=nworkers,
                            show_progress_bar=False,
                            R_star_min=tls_rstar_min,
                            R_star_max=tls_rstar_max,
                            M_star_min=tls_mstar_min,
                            M_star_max=tls_mstar_max,
                            period_min=startp,
                            period_max=endp,
                            n_transits_min=tls_mintransits,
                            transit_template=tls_transit_template,
                            oversampling_factor=tls_oversample)

    # get the peak values
    lsp = nparray(tlsresult.power)
    periods = nparray(tlsresult.periods)

    # find the nbestpeaks for the periodogram: 1. sort the lsp array by highest
    # value first 2. go down the values until we find five values that are
    # separated by at least periodepsilon in period make sure to get only the
    # finite peaks in the periodogram this is needed because tls may produce
    # infs for some peaks
    finitepeakind = npisfinite(lsp)
    finlsp = lsp[finitepeakind]
    finperiods = periods[finitepeakind]

    # make sure that finlsp has finite values before we work on it
    try:

        bestperiodind = npargmax(finlsp)

    except ValueError:

        LOGERROR('no finite periodogram values '
                 'for this mag series, skipping...')
        resultdict = {
            'tlsresult': npnan,
            'bestperiod': npnan,
            'bestlspval': npnan,
            'nbestpeaks': nbestpeaks,
            'nbestinds': None,
            'nbestlspvals': None,
            'nbestperiods': None,
            'lspvals': None,
            'periods': None,
            'method': 'tls',
            'kwargs': {
                'startp': startp,
                'endp': endp,
                'tls_oversample': tls_oversample,
                'tls_ntransits': tls_mintransits,
                'tls_transit_template': tls_transit_template,
                'tls_rstar_min': tls_rstar_min,
                'tls_rstar_max': tls_rstar_max,
                'tls_mstar_min': tls_mstar_min,
                'tls_mstar_max': tls_mstar_max,
                'periodepsilon': periodepsilon,
                'nbestpeaks': nbestpeaks,
                'sigclip': sigclip,
                'magsarefluxes': magsarefluxes
            }
        }
        return resultdict

    sortedlspind = npargsort(finlsp)[::-1]
    sortedlspperiods = finperiods[sortedlspind]
    sortedlspvals = finlsp[sortedlspind]

    # now get the nbestpeaks
    nbestperiods, nbestlspvals, nbestinds, peakcount = ([
        finperiods[bestperiodind]
    ], [finlsp[bestperiodind]], [bestperiodind], 1)
    prevperiod = sortedlspperiods[0]

    # find the best nbestpeaks in the lsp and their periods
    for period, lspval, ind in zip(sortedlspperiods, sortedlspvals,
                                   sortedlspind):

        if peakcount == nbestpeaks:
            break
        perioddiff = abs(period - prevperiod)
        bestperiodsdiff = [abs(period - x) for x in nbestperiods]

        # this ensures that this period is different from the last
        # period and from all the other existing best periods by
        # periodepsilon to make sure we jump to an entire different
        # peak in the periodogram
        if (perioddiff > (periodepsilon * prevperiod)
                and all(x > (periodepsilon * period)
                        for x in bestperiodsdiff)):
            nbestperiods.append(period)
            nbestlspvals.append(lspval)
            nbestinds.append(ind)
            peakcount = peakcount + 1

        prevperiod = period

    # generate the return dict
    resultdict = {
        'tlsresult': tlsresult,
        'bestperiod': finperiods[bestperiodind],
        'bestlspval': finlsp[bestperiodind],
        'nbestpeaks': nbestpeaks,
        'nbestinds': nbestinds,
        'nbestlspvals': nbestlspvals,
        'nbestperiods': nbestperiods,
        'lspvals': lsp,
        'periods': periods,
        'method': 'tls',
        'kwargs': {
            'startp': startp,
            'endp': endp,
            'tls_oversample': tls_oversample,
            'tls_ntransits': tls_mintransits,
            'tls_transit_template': tls_transit_template,
            'tls_rstar_min': tls_rstar_min,
            'tls_rstar_max': tls_rstar_max,
            'tls_mstar_min': tls_mstar_min,
            'tls_mstar_max': tls_mstar_max,
            'periodepsilon': periodepsilon,
            'nbestpeaks': nbestpeaks,
            'sigclip': sigclip,
            'magsarefluxes': magsarefluxes
        }
    }

    return resultdict
Exemple #16
0
def specwindow_lsp(times,
                   mags,
                   errs,
                   magsarefluxes=False,
                   startp=None,
                   endp=None,
                   stepsize=1.0e-4,
                   autofreq=True,
                   nbestpeaks=5,
                   periodepsilon=0.1,
                   sigclip=10.0,
                   nworkers=None,
                   glspfunc=_glsp_worker_specwindow,
                   verbose=True):
    '''This calculates the spectral window function.

    Wraps the `pgen_lsp` function above to use the specific worker for
    calculating the window-function.

    Parameters
    ----------

    times,mags,errs : np.array
        The mag/flux time-series with associated measurement errors to run the
        period-finding on.

    magsarefluxes : bool
        If the input measurement values in `mags` and `errs` are in fluxes, set
        this to True.

    startp,endp : float or None
        The minimum and maximum periods to consider for the transit search.

    stepsize : float
        The step-size in frequency to use when constructing a frequency grid for
        the period search.

    autofreq : bool
        If this is True, the value of `stepsize` will be ignored and the
        :py:func:`astrobase.periodbase.get_frequency_grid` function will be used
        to generate a frequency grid based on `startp`, and `endp`. If these are
        None as well, `startp` will be set to 0.1 and `endp` will be set to
        `times.max() - times.min()`.

    nbestpeaks : int
        The number of 'best' peaks to return from the periodogram results,
        starting from the global maximum of the periodogram peak values.

    periodepsilon : float
        The fractional difference between successive values of 'best' periods
        when sorting by periodogram power to consider them as separate periods
        (as opposed to part of the same periodogram peak). This is used to avoid
        broad peaks in the periodogram and make sure the 'best' periods returned
        are all actually independent.

    sigclip : float or int or sequence of two floats/ints or None
        If a single float or int, a symmetric sigma-clip will be performed using
        the number provided as the sigma-multiplier to cut out from the input
        time-series.

        If a list of two ints/floats is provided, the function will perform an
        'asymmetric' sigma-clip. The first element in this list is the sigma
        value to use for fainter flux/mag values; the second element in this
        list is the sigma value to use for brighter flux/mag values. For
        example, `sigclip=[10., 3.]`, will sigclip out greater than 10-sigma
        dimmings and greater than 3-sigma brightenings. Here the meaning of
        "dimming" and "brightening" is set by *physics* (not the magnitude
        system), which is why the `magsarefluxes` kwarg must be correctly set.

        If `sigclip` is None, no sigma-clipping will be performed, and the
        time-series (with non-finite elems removed) will be passed through to
        the output.

    nworkers : int
        The number of parallel workers to use when calculating the periodogram.

    glspfunc : Python function
        The worker function to use to calculate the periodogram. This is used to
        used to make the `pgen_lsp` function calculate the time-series sampling
        window function instead of the time-series measurements' GLS periodogram
        by passing in `_glsp_worker_specwindow` instead of the default
        `_glsp_worker` function.

    verbose : bool
        If this is True, will indicate progress and details about the frequency
        grid used for the period search.

    Returns
    -------

    dict
        This function returns a dict, referred to as an `lspinfo` dict in other
        astrobase functions that operate on periodogram results. This is a
        standardized format across all astrobase period-finders, and is of the
        form below::

            {'bestperiod': the best period value in the periodogram,
             'bestlspval': the periodogram peak associated with the best period,
             'nbestpeaks': the input value of nbestpeaks,
             'nbestlspvals': nbestpeaks-size list of best period peak values,
             'nbestperiods': nbestpeaks-size list of best periods,
             'lspvals': the full array of periodogram powers,
             'periods': the full array of periods considered,
             'method':'win' -> the name of the period-finder method,
             'kwargs':{ dict of all of the input kwargs for record-keeping}}

    '''

    # run the LSP using glsp_worker_specwindow as the worker
    lspres = pgen_lsp(times,
                      mags,
                      errs,
                      magsarefluxes=magsarefluxes,
                      startp=startp,
                      endp=endp,
                      autofreq=autofreq,
                      nbestpeaks=nbestpeaks,
                      periodepsilon=periodepsilon,
                      stepsize=stepsize,
                      nworkers=nworkers,
                      sigclip=sigclip,
                      glspfunc=glspfunc,
                      verbose=verbose)

    # update the resultdict to indicate we're a spectral window function
    lspres['method'] = 'win'

    if lspres['lspvals'] is not None:

        # renormalize the periodogram to between 0 and 1 like the usual GLS.
        lspmax = npnanmax(lspres['lspvals'])

        if npisfinite(lspmax):

            lspres['lspvals'] = lspres['lspvals'] / lspmax
            lspres['nbestlspvals'] = [
                x / lspmax for x in lspres['nbestlspvals']
            ]
            lspres['bestlspval'] = lspres['bestlspval'] / lspmax

    return lspres
Exemple #17
0
def aovhm_periodfind(
        times,
        mags,
        errs,
        nharmonics=6,
        magsarefluxes=False,
        autofreq=True,
        startp=None,
        endp=None,
        normalize=True,
        stepsize=1.0e-4,
        nbestpeaks=5,
        periodepsilon=0.1,  # 0.1
        sigclip=10.0,
        nworkers=None,
        verbose=True):
    '''This runs a parallel AoV period search.

    NOTE: normalize = True here as recommended by Schwarzenberg-Czerny 1996,
    i.e. mags will be normalized to zero and rescaled so their variance = 1.0

    '''

    # get rid of nans first and sigclip
    stimes, smags, serrs = sigclip_magseries(times,
                                             mags,
                                             errs,
                                             magsarefluxes=magsarefluxes,
                                             sigclip=sigclip)

    # make sure there are enough points to calculate a spectrum
    if len(stimes) > 9 and len(smags) > 9 and len(serrs) > 9:

        # get the frequencies to use
        if startp:
            endf = 1.0 / startp
        else:
            # default start period is 0.1 day
            endf = 1.0 / 0.1

        if endp:
            startf = 1.0 / endp
        else:
            # default end period is length of time series
            startf = 1.0 / (stimes.max() - stimes.min())

        # if we're not using autofreq, then use the provided frequencies
        if not autofreq:
            frequencies = np.arange(startf, endf, stepsize)
            if verbose:
                LOGINFO(
                    'using %s frequency points, start P = %.3f, end P = %.3f' %
                    (frequencies.size, 1.0 / endf, 1.0 / startf))
        else:
            # this gets an automatic grid of frequencies to use
            frequencies = get_frequency_grid(stimes,
                                             minfreq=startf,
                                             maxfreq=endf)
            if verbose:
                LOGINFO('using autofreq with %s frequency points, '
                        'start P = %.3f, end P = %.3f' %
                        (frequencies.size, 1.0 / frequencies.max(),
                         1.0 / frequencies.min()))

        # map to parallel workers
        if (not nworkers) or (nworkers > NCPUS):
            nworkers = NCPUS
            if verbose:
                LOGINFO('using %s workers...' % nworkers)

        pool = Pool(nworkers)

        # renormalize the working mags to zero and scale them so that the
        # variance = 1 for use with our LSP functions
        if normalize:
            nmags = (smags - npmedian(smags)) / npstd(smags)
        else:
            nmags = smags

        # figure out the weighted variance
        # www.itl.nist.gov/div898/software/dataplot/refman2/ch2/weighvar.pdf
        magvariance_top = npsum(nmags / (serrs * serrs))
        magvariance_bot = (nmags.size - 1) * npsum(
            1.0 / (serrs * serrs)) / nmags.size
        magvariance = magvariance_top / magvariance_bot

        tasks = [(stimes, nmags, serrs, x, nharmonics, magvariance)
                 for x in frequencies]

        lsp = pool.map(aovhm_theta_worker, tasks)

        pool.close()
        pool.join()
        del pool

        lsp = nparray(lsp)
        periods = 1.0 / frequencies

        # find the nbestpeaks for the periodogram: 1. sort the lsp array by
        # highest value first 2. go down the values until we find five
        # values that are separated by at least periodepsilon in period

        # make sure to filter out non-finite values
        finitepeakind = npisfinite(lsp)
        finlsp = lsp[finitepeakind]
        finperiods = periods[finitepeakind]

        # make sure that finlsp has finite values before we work on it
        try:

            bestperiodind = npargmax(finlsp)

        except ValueError:

            LOGERROR('no finite periodogram values '
                     'for this mag series, skipping...')
            return {
                'bestperiod': npnan,
                'bestlspval': npnan,
                'nbestpeaks': nbestpeaks,
                'nbestlspvals': None,
                'nbestperiods': None,
                'lspvals': None,
                'periods': None,
                'method': 'mav',
                'kwargs': {
                    'startp': startp,
                    'endp': endp,
                    'stepsize': stepsize,
                    'normalize': normalize,
                    'nharmonics': nharmonics,
                    'autofreq': autofreq,
                    'periodepsilon': periodepsilon,
                    'nbestpeaks': nbestpeaks,
                    'sigclip': sigclip
                }
            }

        sortedlspind = np.argsort(finlsp)[::-1]
        sortedlspperiods = finperiods[sortedlspind]
        sortedlspvals = finlsp[sortedlspind]

        prevbestlspval = sortedlspvals[0]
        # now get the nbestpeaks
        nbestperiods, nbestlspvals, peakcount = ([finperiods[bestperiodind]],
                                                 [finlsp[bestperiodind]], 1)
        prevperiod = sortedlspperiods[0]

        # find the best nbestpeaks in the lsp and their periods
        for period, lspval in zip(sortedlspperiods, sortedlspvals):

            if peakcount == nbestpeaks:
                break
            perioddiff = abs(period - prevperiod)
            bestperiodsdiff = [abs(period - x) for x in nbestperiods]

            # print('prevperiod = %s, thisperiod = %s, '
            #       'perioddiff = %s, peakcount = %s' %
            #       (prevperiod, period, perioddiff, peakcount))

            # this ensures that this period is different from the last
            # period and from all the other existing best periods by
            # periodepsilon to make sure we jump to an entire different peak
            # in the periodogram
            if (perioddiff > (periodepsilon * prevperiod)
                    and all(x > (periodepsilon * prevperiod)
                            for x in bestperiodsdiff)):
                nbestperiods.append(period)
                nbestlspvals.append(lspval)
                peakcount = peakcount + 1

            prevperiod = period

        return {
            'bestperiod': finperiods[bestperiodind],
            'bestlspval': finlsp[bestperiodind],
            'nbestpeaks': nbestpeaks,
            'nbestlspvals': nbestlspvals,
            'nbestperiods': nbestperiods,
            'lspvals': lsp,
            'periods': periods,
            'method': 'mav',
            'kwargs': {
                'startp': startp,
                'endp': endp,
                'stepsize': stepsize,
                'normalize': normalize,
                'nharmonics': nharmonics,
                'autofreq': autofreq,
                'periodepsilon': periodepsilon,
                'nbestpeaks': nbestpeaks,
                'sigclip': sigclip
            }
        }

    else:

        LOGERROR('no good detections for these times and mags, skipping...')
        return {
            'bestperiod': npnan,
            'bestlspval': npnan,
            'nbestpeaks': nbestpeaks,
            'nbestlspvals': None,
            'nbestperiods': None,
            'lspvals': None,
            'periods': None,
            'method': 'mav',
            'kwargs': {
                'startp': startp,
                'endp': endp,
                'stepsize': stepsize,
                'normalize': normalize,
                'nharmonics': nharmonics,
                'autofreq': autofreq,
                'periodepsilon': periodepsilon,
                'nbestpeaks': nbestpeaks,
                'sigclip': sigclip
            }
        }
Exemple #18
0
def pgen_lsp(
        times,
        mags,
        errs,
        magsarefluxes=False,
        startp=None,
        endp=None,
        autofreq=True,
        nbestpeaks=5,
        periodepsilon=0.1,  # 0.1
        stepsize=1.0e-4,
        nworkers=None,
        workchunksize=None,
        sigclip=10.0,
        glspfunc=glsp_worker,
        verbose=True):
    '''This calculates the generalized LSP given times, mags, errors.

    Uses the algorithm from Zechmeister and Kurster (2009). By default, this
    calculates a frequency grid to use automatically, based on the autofrequency
    function from astropy.stats.lombscargle. If startp and endp are provided,
    will generate a frequency grid based on these instead.

    '''

    # get rid of nans first and sigclip
    stimes, smags, serrs = sigclip_magseries(times,
                                             mags,
                                             errs,
                                             magsarefluxes=magsarefluxes,
                                             sigclip=sigclip)

    # get rid of zero errs
    nzind = np.nonzero(serrs)
    stimes, smags, serrs = stimes[nzind], smags[nzind], serrs[nzind]

    # make sure there are enough points to calculate a spectrum
    if len(stimes) > 9 and len(smags) > 9 and len(serrs) > 9:

        # get the frequencies to use
        if startp:
            endf = 1.0 / startp
        else:
            # default start period is 0.1 day
            endf = 1.0 / 0.1

        if endp:
            startf = 1.0 / endp
        else:
            # default end period is length of time series
            startf = 1.0 / (stimes.max() - stimes.min())

        # if we're not using autofreq, then use the provided frequencies
        if not autofreq:
            omegas = 2 * np.pi * np.arange(startf, endf, stepsize)
            if verbose:
                LOGINFO(
                    'using %s frequency points, start P = %.3f, end P = %.3f' %
                    (omegas.size, 1.0 / endf, 1.0 / startf))
        else:
            # this gets an automatic grid of frequencies to use
            freqs = get_frequency_grid(stimes, minfreq=startf, maxfreq=endf)
            omegas = 2 * np.pi * freqs
            if verbose:
                LOGINFO('using autofreq with %s frequency points, '
                        'start P = %.3f, end P = %.3f' %
                        (omegas.size, 1.0 / freqs.max(), 1.0 / freqs.min()))

        # map to parallel workers
        if (not nworkers) or (nworkers > NCPUS):
            nworkers = NCPUS
            if verbose:
                LOGINFO('using %s workers...' % nworkers)

        pool = Pool(nworkers)

        tasks = [(stimes, smags, serrs, x) for x in omegas]
        if workchunksize:
            lsp = pool.map(glspfunc, tasks, chunksize=workchunksize)
        else:
            lsp = pool.map(glspfunc, tasks)

        pool.close()
        pool.join()
        del pool

        lsp = np.array(lsp)
        periods = 2.0 * np.pi / omegas

        # find the nbestpeaks for the periodogram: 1. sort the lsp array by
        # highest value first 2. go down the values until we find five
        # values that are separated by at least periodepsilon in period

        # make sure to filter out non-finite values of lsp

        finitepeakind = npisfinite(lsp)
        finlsp = lsp[finitepeakind]
        finperiods = periods[finitepeakind]

        # make sure that finlsp has finite values before we work on it
        try:

            bestperiodind = npargmax(finlsp)

        except ValueError:

            LOGERROR('no finite periodogram values '
                     'for this mag series, skipping...')
            return {
                'bestperiod': npnan,
                'bestlspval': npnan,
                'nbestpeaks': nbestpeaks,
                'nbestlspvals': None,
                'nbestperiods': None,
                'lspvals': None,
                'omegas': omegas,
                'periods': None,
                'method': 'gls',
                'kwargs': {
                    'startp': startp,
                    'endp': endp,
                    'stepsize': stepsize,
                    'autofreq': autofreq,
                    'periodepsilon': periodepsilon,
                    'nbestpeaks': nbestpeaks,
                    'sigclip': sigclip
                }
            }

        sortedlspind = np.argsort(finlsp)[::-1]
        sortedlspperiods = finperiods[sortedlspind]
        sortedlspvals = finlsp[sortedlspind]

        prevbestlspval = sortedlspvals[0]

        # now get the nbestpeaks
        nbestperiods, nbestlspvals, peakcount = ([finperiods[bestperiodind]],
                                                 [finlsp[bestperiodind]], 1)
        prevperiod = sortedlspperiods[0]

        # find the best nbestpeaks in the lsp and their periods
        for period, lspval in zip(sortedlspperiods, sortedlspvals):

            if peakcount == nbestpeaks:
                break
            perioddiff = abs(period - prevperiod)
            bestperiodsdiff = [abs(period - x) for x in nbestperiods]

            # print('prevperiod = %s, thisperiod = %s, '
            #       'perioddiff = %s, peakcount = %s' %
            #       (prevperiod, period, perioddiff, peakcount))

            # this ensures that this period is different from the last
            # period and from all the other existing best periods by
            # periodepsilon to make sure we jump to an entire different peak
            # in the periodogram
            if (perioddiff > (periodepsilon * prevperiod)
                    and all(x > (periodepsilon * prevperiod)
                            for x in bestperiodsdiff)):
                nbestperiods.append(period)
                nbestlspvals.append(lspval)
                peakcount = peakcount + 1

            prevperiod = period

        return {
            'bestperiod': finperiods[bestperiodind],
            'bestlspval': finlsp[bestperiodind],
            'nbestpeaks': nbestpeaks,
            'nbestlspvals': nbestlspvals,
            'nbestperiods': nbestperiods,
            'lspvals': lsp,
            'omegas': omegas,
            'periods': periods,
            'method': 'gls',
            'kwargs': {
                'startp': startp,
                'endp': endp,
                'stepsize': stepsize,
                'autofreq': autofreq,
                'periodepsilon': periodepsilon,
                'nbestpeaks': nbestpeaks,
                'sigclip': sigclip
            }
        }

    else:

        LOGERROR('no good detections for these times and mags, skipping...')
        return {
            'bestperiod': npnan,
            'bestlspval': npnan,
            'nbestpeaks': nbestpeaks,
            'nbestlspvals': None,
            'nbestperiods': None,
            'lspvals': None,
            'omegas': None,
            'periods': None,
            'method': 'gls',
            'kwargs': {
                'startp': startp,
                'endp': endp,
                'stepsize': stepsize,
                'autofreq': autofreq,
                'periodepsilon': periodepsilon,
                'nbestpeaks': nbestpeaks,
                'sigclip': sigclip
            }
        }
Exemple #19
0
def stellingwerf_pdm(times,
                     mags,
                     errs,
                     magsarefluxes=False,
                     autofreq=True,
                     startp=None,
                     endp=None,
                     normalize=False,
                     stepsize=1.0e-4,
                     phasebinsize=0.05,
                     mindetperbin=9,
                     nbestpeaks=5,
                     periodepsilon=0.1, # 0.1
                     sigclip=10.0,
                     nworkers=None,
                     verbose=True):
    '''This runs a parallel Stellingwerf PDM period search.

    '''

    # get rid of nans first and sigclip
    stimes, smags, serrs = sigclip_magseries(times,
                                             mags,
                                             errs,
                                             magsarefluxes=magsarefluxes,
                                             sigclip=sigclip)

    # make sure there are enough points to calculate a spectrum
    if len(stimes) > 9 and len(smags) > 9 and len(serrs) > 9:

        # get the frequencies to use
        if startp:
            endf = 1.0/startp
        else:
            # default start period is 0.1 day
            endf = 1.0/0.1

        if endp:
            startf = 1.0/endp
        else:
            # default end period is length of time series
            startf = 1.0/(stimes.max() - stimes.min())

        # if we're not using autofreq, then use the provided frequencies
        if not autofreq:
            frequencies = np.arange(startf, endf, stepsize)
            if verbose:
                LOGINFO(
                    'using %s frequency points, start P = %.3f, end P = %.3f' %
                    (frequencies.size, 1.0/endf, 1.0/startf)
                )
        else:
            # this gets an automatic grid of frequencies to use
            frequencies = get_frequency_grid(stimes,
                                             minfreq=startf,
                                             maxfreq=endf)
            if verbose:
                LOGINFO(
                    'using autofreq with %s frequency points, '
                    'start P = %.3f, end P = %.3f' %
                    (frequencies.size,
                     1.0/frequencies.max(),
                     1.0/frequencies.min())
                )

        # map to parallel workers
        if (not nworkers) or (nworkers > NCPUS):
            nworkers = NCPUS
            if verbose:
                LOGINFO('using %s workers...' % nworkers)

        pool = Pool(nworkers)

        # renormalize the working mags to zero and scale them so that the
        # variance = 1 for use with our LSP functions
        if normalize:
            nmags = (smags - npmedian(smags))/npstd(smags)
        else:
            nmags = smags

        tasks = [(stimes, nmags, serrs, x, phasebinsize, mindetperbin)
                 for x in frequencies]

        lsp = pool.map(stellingwerf_pdm_worker, tasks)

        pool.close()
        pool.join()
        del pool

        lsp = nparray(lsp)
        periods = 1.0/frequencies

        # find the nbestpeaks for the periodogram: 1. sort the lsp array by
        # lowest value first 2. go down the values until we find five values
        # that are separated by at least periodepsilon in period

        # make sure to filter out the non-finite values of lsp
        finitepeakind = npisfinite(lsp)
        finlsp = lsp[finitepeakind]
        finperiods = periods[finitepeakind]

        # finlsp might not have any finite values if the period finding
        # failed. if so, argmin will return a ValueError.
        try:

            bestperiodind = npargmin(finlsp)

        except ValueError:

            LOGERROR('no finite periodogram values for '
                     'this mag series, skipping...')
            return {'bestperiod':npnan,
                    'bestlspval':npnan,
                    'nbestpeaks':nbestpeaks,
                    'nbestlspvals':None,
                    'nbestperiods':None,
                    'lspvals':None,
                    'periods':None,
                    'method':'pdm',
                    'kwargs':{'startp':startp,
                              'endp':endp,
                              'stepsize':stepsize,
                              'normalize':normalize,
                              'phasebinsize':phasebinsize,
                              'mindetperbin':mindetperbin,
                              'autofreq':autofreq,
                              'periodepsilon':periodepsilon,
                              'nbestpeaks':nbestpeaks,
                              'sigclip':sigclip}}

        sortedlspind = np.argsort(finlsp)
        sortedlspperiods = finperiods[sortedlspind]
        sortedlspvals = finlsp[sortedlspind]

        prevbestlspval = sortedlspvals[0]

        # now get the nbestpeaks
        nbestperiods, nbestlspvals, peakcount = (
            [finperiods[bestperiodind]],
            [finlsp[bestperiodind]],
            1
        )
        prevperiod = sortedlspperiods[0]

        # find the best nbestpeaks in the lsp and their periods
        for period, lspval in zip(sortedlspperiods, sortedlspvals):

            if peakcount == nbestpeaks:
                break
            perioddiff = abs(period - prevperiod)
            bestperiodsdiff = [abs(period - x) for x in nbestperiods]

            # print('prevperiod = %s, thisperiod = %s, '
            #       'perioddiff = %s, peakcount = %s' %
            #       (prevperiod, period, perioddiff, peakcount))

            # this ensures that this period is different from the last
            # period and from all the other existing best periods by
            # periodepsilon to make sure we jump to an entire different peak
            # in the periodogram
            if (perioddiff > (periodepsilon*prevperiod) and
                all(x > (periodepsilon*prevperiod) for x in bestperiodsdiff)):
                nbestperiods.append(period)
                nbestlspvals.append(lspval)
                peakcount = peakcount + 1

            prevperiod = period


        return {'bestperiod':finperiods[bestperiodind],
                'bestlspval':finlsp[bestperiodind],
                'nbestpeaks':nbestpeaks,
                'nbestlspvals':nbestlspvals,
                'nbestperiods':nbestperiods,
                'lspvals':lsp,
                'periods':periods,
                'method':'pdm',
                'kwargs':{'startp':startp,
                          'endp':endp,
                          'stepsize':stepsize,
                          'normalize':normalize,
                          'phasebinsize':phasebinsize,
                          'mindetperbin':mindetperbin,
                          'autofreq':autofreq,
                          'periodepsilon':periodepsilon,
                          'nbestpeaks':nbestpeaks,
                          'sigclip':sigclip}}

    else:

        LOGERROR('no good detections for these times and mags, skipping...')
        return {'bestperiod':npnan,
                'bestlspval':npnan,
                'nbestpeaks':nbestpeaks,
                'nbestlspvals':None,
                'nbestperiods':None,
                'lspvals':None,
                'periods':None,
                'method':'pdm',
                'kwargs':{'startp':startp,
                          'endp':endp,
                          'stepsize':stepsize,
                          'normalize':normalize,
                          'phasebinsize':phasebinsize,
                          'mindetperbin':mindetperbin,
                          'autofreq':autofreq,
                          'periodepsilon':periodepsilon,
                          'nbestpeaks':nbestpeaks,
                          'sigclip':sigclip}}
Exemple #20
0
def scipylsp_parallel(
        times,
        mags,
        errs,  # ignored but for consistent API
        startp,
        endp,
        nbestpeaks=5,
        periodepsilon=0.1,  # 0.1
        stepsize=1.0e-4,
        nworkers=4,
        sigclip=None,
        timebin=None):
    '''
    This uses the LSP function from the scipy library, which is fast as hell. We
    try to make it faster by running LSP for sections of the omegas array in
    parallel.

    '''

    # make sure there are no nans anywhere
    finiteind = np.isfinite(mags) & np.isfinite(errs)
    ftimes, fmags, ferrs = times[finiteind], mags[finiteind], errs[finiteind]

    if len(ftimes) > 0 and len(fmags) > 0:

        # sigclip the lightcurve if asked to do so
        if sigclip:
            worktimes, workmags, _ = sigclip_magseries(ftimes,
                                                       fmags,
                                                       ferrs,
                                                       sigclip=sigclip)
            LOGINFO('ndet after sigclipping = %s' % len(worktimes))

        else:
            worktimes = ftimes
            workmags = fmags

        # bin the lightcurve if asked to do so
        if timebin:

            binned = time_bin_magseries(worktimes, workmags, binsize=timebin)
            worktimes = binned['binnedtimes']
            workmags = binned['binnedmags']

        # renormalize the working mags to zero and scale them so that the
        # variance = 1 for use with our LSP functions
        normmags = (workmags - np.median(workmags)) / np.std(workmags)

        startf = 1.0 / endp
        endf = 1.0 / startp
        omegas = 2 * np.pi * np.arange(startf, endf, stepsize)

        # partition the omegas array by nworkers
        tasks = []
        chunksize = int(float(len(omegas)) / nworkers) + 1
        tasks = [
            omegas[x * chunksize:x * chunksize + chunksize]
            for x in range(nworkers)
        ]

        # map to parallel workers
        if (not nworkers) or (nworkers > NCPUS):
            nworkers = NCPUS
            LOGINFO('using %s workers...' % nworkers)

        pool = Pool(nworkers)

        tasks = [(worktimes, normmags, x) for x in tasks]
        lsp = pool.map(parallel_scipylsp_worker, tasks)

        pool.close()
        pool.join()

        lsp = np.concatenate(lsp)
        periods = 2.0 * np.pi / omegas

        # find the nbestpeaks for the periodogram: 1. sort the lsp array by
        # highest value first 2. go down the values until we find five values
        # that are separated by at least periodepsilon in period

        # make sure we only get finite lsp values
        finitepeakind = npisfinite(lsp)
        finlsp = lsp[finitepeakind]
        finperiods = periods[finitepeakind]

        bestperiodind = npargmax(finlsp)

        sortedlspind = np.argsort(finlsp)[::-1]
        sortedlspperiods = finperiods[sortedlspind]
        sortedlspvals = finlsp[sortedlspind]

        prevbestlspval = sortedlspvals[0]
        # now get the nbestpeaks
        nbestperiods, nbestlspvals, peakcount = ([finperiods[bestperiodind]],
                                                 [finlsp[bestperiodind]], 1)
        prevperiod = sortedlspperiods[0]

        # find the best nbestpeaks in the lsp and their periods
        for period, lspval in zip(sortedlspperiods, sortedlspvals):

            if peakcount == nbestpeaks:
                break
            perioddiff = abs(period - prevperiod)
            bestperiodsdiff = [abs(period - x) for x in nbestperiods]

            # print('prevperiod = %s, thisperiod = %s, '
            #       'perioddiff = %s, peakcount = %s' %
            #       (prevperiod, period, perioddiff, peakcount))

            # this ensures that this period is different from the last period
            # and from all the other existing best periods by periodepsilon to
            # make sure we jump to an entire different peak in the periodogram
            if (perioddiff > periodepsilon
                    and all(x > periodepsilon for x in bestperiodsdiff)):
                nbestperiods.append(period)
                nbestlspvals.append(lspval)
                peakcount = peakcount + 1

            prevperiod = period

        return {
            'bestperiod': finperiods[bestperiodind],
            'bestlspval': finlsp[bestperiodind],
            'nbestpeaks': nbestpeaks,
            'nbestlspvals': nbestlspvals,
            'nbestperiods': nbestperiods,
            'lspvals': lsp,
            'omegas': omegas,
            'periods': periods,
            'method': 'sls'
        }

    else:

        LOGERROR('no good detections for these times and mags, skipping...')
        return {
            'bestperiod': npnan,
            'bestlspval': npnan,
            'nbestpeaks': nbestpeaks,
            'nbestlspvals': None,
            'nbestperiods': None,
            'lspvals': None,
            'periods': None,
            'method': 'sls'
        }
Exemple #21
0
def bls_parallel_pfind(
    times,
    mags,
    errs,
    magsarefluxes=False,
    startp=0.1,  # by default, search from 0.1 d to...
    endp=100.0,  # ... 100.0 d -- don't search full timebase
    stepsize=1.0e-4,
    mintransitduration=0.01,  # minimum transit length in phase
    maxtransitduration=0.4,  # maximum transit length in phase
    ndurations=100,
    autofreq=True,  # figure out f0, nf, and df automatically
    blsobjective='likelihood',
    blsmethod='fast',
    blsoversample=5,
    blsmintransits=3,
    blsfreqfactor=10.0,
    nbestpeaks=5,
    periodepsilon=0.1,  # 0.1
    sigclip=10.0,
    endp_timebase_check=True,
    verbose=True,
    nworkers=None,
):
    '''Runs the Box Least Squares Fitting Search for transit-shaped signals.

    Breaks up the full frequency space into chunks and passes them to parallel
    BLS workers.

    Based on the version of BLS in Astropy 3.1:
    `astropy.stats.BoxLeastSquares`. If you don't have Astropy 3.1, this module
    will fail to import. Note that by default, this implementation of
    `bls_parallel_pfind` doesn't use the `.autoperiod()` function from
    `BoxLeastSquares` but uses the same auto frequency-grid generation as the
    functions in `periodbase.kbls`. If you want to use Astropy's implementation,
    set the value of `autofreq` kwarg to 'astropy'. The generated period array
    will then be broken up into chunks and sent to the individual workers.

    NOTE: the combined BLS spectrum produced by this function is not identical
    to that produced by running BLS in one shot for the entire frequency
    space. There are differences on the order of 1.0e-3 or so in the respective
    peak values, but peaks appear at the same frequencies for both methods. This
    is likely due to different aliasing caused by smaller chunks of the
    frequency space used by the parallel workers in this function. When in
    doubt, confirm results for this parallel implementation by comparing to
    those from the serial implementation above.

    In particular, when you want to get reliable estimates of the SNR, transit
    depth, duration, etc. that Astropy's BLS gives you, rerun `bls_serial_pfind`
    with `startp`, and `endp` close to the best period you want to characterize
    the transit at. The dict returned from that function contains a `blsmodel`
    key, which is the generated model from Astropy's BLS. Use the
    `.compute_stats()` method to calculate the required stats.

    Parameters
    ----------

    times,mags,errs : np.array
        The magnitude/flux time-series to search for transits.

    magsarefluxes : bool
        If the input measurement values in `mags` and `errs` are in fluxes, set
        this to True.

    startp,endp : float
        The minimum and maximum periods to consider for the transit search.

    stepsize : float
        The step-size in frequency to use when constructing a frequency grid for
        the period search.

    mintransitduration,maxtransitduration : float
        The minimum and maximum transitdurations (in units of phase) to consider
        for the transit search.

    ndurations : int
        The number of transit durations to use in the period-search.

    autofreq : bool or str
        If this is True, the values of `stepsize` and `nphasebins` will be
        ignored, and these, along with a frequency-grid, will be determined
        based on the following relations::

            nphasebins = int(ceil(2.0/mintransitduration))
            if nphasebins > 3000:
                nphasebins = 3000

            stepsize = 0.25*mintransitduration/(times.max()-times.min())

            minfreq = 1.0/endp
            maxfreq = 1.0/startp
            nfreq = int(ceil((maxfreq - minfreq)/stepsize))

        If this is False, you must set `startp`, `endp`, and `stepsize` as
        appropriate.

        If this is str == 'astropy', will use the
        `astropy.stats.BoxLeastSquares.autoperiod()` function to calculate the
        frequency grid instead of the kbls method.

    blsobjective : {'likelihood','snr'}
        Sets the type of objective to optimize in the `BoxLeastSquares.power()`
        function.

    blsmethod : {'fast','slow'}
        Sets the type of method to use in the `BoxLeastSquares.power()`
        function.

    blsoversample : {'likelihood','snr'}
        Sets the `oversample` kwarg for the `BoxLeastSquares.power()` function.

    blsmintransits : int
        Sets the `min_n_transits` kwarg for the `BoxLeastSquares.autoperiod()`
        function.

    blsfreqfactor : float
        Sets the `frequency_factor` kwarg for the `BoxLeastSquares.autoperiod()`
        function.

    periodepsilon : float
        The fractional difference between successive values of 'best' periods
        when sorting by periodogram power to consider them as separate periods
        (as opposed to part of the same periodogram peak). This is used to avoid
        broad peaks in the periodogram and make sure the 'best' periods returned
        are all actually independent.

    nbestpeaks : int
        The number of 'best' peaks to return from the periodogram results,
        starting from the global maximum of the periodogram peak values.

    sigclip : float or int or sequence of two floats/ints or None
        If a single float or int, a symmetric sigma-clip will be performed using
        the number provided as the sigma-multiplier to cut out from the input
        time-series.

        If a list of two ints/floats is provided, the function will perform an
        'asymmetric' sigma-clip. The first element in this list is the sigma
        value to use for fainter flux/mag values; the second element in this
        list is the sigma value to use for brighter flux/mag values. For
        example, `sigclip=[10., 3.]`, will sigclip out greater than 10-sigma
        dimmings and greater than 3-sigma brightenings. Here the meaning of
        "dimming" and "brightening" is set by *physics* (not the magnitude
        system), which is why the `magsarefluxes` kwarg must be correctly set.

        If `sigclip` is None, no sigma-clipping will be performed, and the
        time-series (with non-finite elems removed) will be passed through to
        the output.

    endp_timebase_check : bool
        If True, will check if the ``endp`` value is larger than the time-base
        of the observations. If it is, will change the ``endp`` value such that
        it is half of the time-base. If False, will allow an ``endp`` larger
        than the time-base of the observations.

    verbose : bool
        If this is True, will indicate progress and details about the frequency
        grid used for the period search.

    nworkers : int or None
        The number of parallel workers to launch for period-search. If None,
        nworkers = NCPUS.

    Returns
    -------

    dict
        This function returns a dict, referred to as an `lspinfo` dict in other
        astrobase functions that operate on periodogram results. This is a
        standardized format across all astrobase period-finders, and is of the
        form below::

            {'bestperiod': the best period value in the periodogram,
             'bestlspval': the periodogram peak associated with the best period,
             'nbestpeaks': the input value of nbestpeaks,
             'nbestlspvals': nbestpeaks-size list of best period peak values,
             'nbestperiods': nbestpeaks-size list of best periods,
             'lspvals': the full array of periodogram powers,
             'frequencies': the full array of frequencies considered,
             'periods': the full array of periods considered,
             'durations': the array of durations used to run BLS,
             'blsresult': Astropy BLS result object (BoxLeastSquaresResult),
             'blsmodel': Astropy BLS BoxLeastSquares object used for work,
             'stepsize': the actual stepsize used,
             'nfreq': the actual nfreq used,
             'durations': the durations array used,
             'mintransitduration': the input mintransitduration,
             'maxtransitduration': the input maxtransitdurations,
             'method':'bls' -> the name of the period-finder method,
             'kwargs':{ dict of all of the input kwargs for record-keeping}}

    '''

    # get rid of nans first and sigclip
    stimes, smags, serrs = sigclip_magseries(times,
                                             mags,
                                             errs,
                                             magsarefluxes=magsarefluxes,
                                             sigclip=sigclip)

    # make sure there are enough points to calculate a spectrum
    if len(stimes) > 9 and len(smags) > 9 and len(serrs) > 9:

        # if we're setting up everything automatically
        if isinstance(autofreq, bool) and autofreq:

            # use heuristic to figure out best timestep
            stepsize = 0.25 * mintransitduration / (stimes.max() -
                                                    stimes.min())

            # now figure out the frequencies to use
            minfreq = 1.0 / endp
            maxfreq = 1.0 / startp
            nfreq = int(npceil((maxfreq - minfreq) / stepsize))

            # say what we're using
            if verbose:
                LOGINFO('min P: %s, max P: %s, nfreq: %s, '
                        'minfreq: %s, maxfreq: %s' %
                        (startp, endp, nfreq, minfreq, maxfreq))
                LOGINFO('autofreq = True: using AUTOMATIC values for '
                        'freq stepsize: %s, ndurations: %s, '
                        'min transit duration: %s, max transit duration: %s' %
                        (stepsize, ndurations, mintransitduration,
                         maxtransitduration))

            use_autoperiod = False

        elif isinstance(autofreq, bool) and not autofreq:

            minfreq = 1.0 / endp
            maxfreq = 1.0 / startp
            nfreq = int(npceil((maxfreq - minfreq) / stepsize))

            # say what we're using
            if verbose:
                LOGINFO('min P: %s, max P: %s, nfreq: %s, '
                        'minfreq: %s, maxfreq: %s' %
                        (startp, endp, nfreq, minfreq, maxfreq))
                LOGINFO('autofreq = False: using PROVIDED values for '
                        'freq stepsize: %s, ndurations: %s, '
                        'min transit duration: %s, max transit duration: %s' %
                        (stepsize, ndurations, mintransitduration,
                         maxtransitduration))

            use_autoperiod = False

        elif isinstance(autofreq, str) and autofreq == 'astropy':

            use_autoperiod = True
            minfreq = 1.0 / endp
            maxfreq = 1.0 / startp

        else:

            LOGERROR("unknown autofreq kwarg encountered. can't continue...")
            return None

        # check the minimum frequency
        if ((minfreq < (1.0 / (stimes.max() - stimes.min())))
                and endp_timebase_check):

            LOGWARNING('the requested max P = %.3f is larger than '
                       'the time base of the observations = %.3f, '
                       ' will make minfreq = 2 x 1/timebase' %
                       (endp, stimes.max() - stimes.min()))
            minfreq = 2.0 / (stimes.max() - stimes.min())
            LOGWARNING('new minfreq: %s, maxfreq: %s' % (minfreq, maxfreq))

        #############################
        ## NOW RUN BLS IN PARALLEL ##
        #############################

        # fix number of CPUs if needed
        if not nworkers or nworkers > NCPUS:
            nworkers = NCPUS
            if verbose:
                LOGINFO('using %s workers...' % nworkers)

        # check if autoperiod is True and get the correct period-grid
        if use_autoperiod:

            # astropy's BLS requires durations in units of time
            durations = nplinspace(mintransitduration * startp,
                                   maxtransitduration * startp, ndurations)

            # set up the correct units for the BLS model
            if magsarefluxes:

                blsmodel = BoxLeastSquares(stimes * u.day,
                                           smags * u.dimensionless_unscaled,
                                           dy=serrs * u.dimensionless_unscaled)

            else:

                blsmodel = BoxLeastSquares(stimes * u.day,
                                           smags * u.mag,
                                           dy=serrs * u.mag)

            periods = nparray(
                blsmodel.autoperiod(durations * u.day,
                                    minimum_period=startp,
                                    maximum_period=endp,
                                    minimum_n_transit=blsmintransits,
                                    frequency_factor=blsfreqfactor))

            frequencies = 1.0 / periods
            nfreq = frequencies.size

            if verbose:
                LOGINFO("autofreq = 'astropy', used .autoperiod() with "
                        "minimum_n_transit = %s, freq_factor = %s "
                        "to generate the frequency grid" %
                        (blsmintransits, blsfreqfactor))
                LOGINFO('stepsize = %s, nfreq = %s, minfreq = %.5f, '
                        'maxfreq = %.5f, ndurations = %s' %
                        (abs(frequencies[1] - frequencies[0]), nfreq, 1.0 /
                         periods.max(), 1.0 / periods.min(), durations.size))

            del blsmodel
            del durations

        # otherwise, use kbls method
        else:

            frequencies = minfreq + nparange(nfreq) * stepsize

        # break up the tasks into chunks
        csrem = int(fmod(nfreq, nworkers))
        csint = int(float(nfreq / nworkers))
        chunk_minfreqs, chunk_nfreqs = [], []

        for x in range(nworkers):

            this_minfreqs = frequencies[x * csint]

            # handle usual nfreqs
            if x < (nworkers - 1):
                this_nfreqs = frequencies[x * csint:x * csint + csint].size
            else:
                this_nfreqs = frequencies[x * csint:x * csint + csint +
                                          csrem].size

            chunk_minfreqs.append(this_minfreqs)
            chunk_nfreqs.append(this_nfreqs)

        # populate the tasks list
        #
        # task[0] = times
        # task[1] = mags
        # task[2] = errs
        # task[3] = magsarefluxes

        # task[4] = minfreq
        # task[5] = nfreq
        # task[6] = stepsize

        # task[7] = nphasebins
        # task[8] = mintransitduration
        # task[9] = maxtransitduration

        # task[10] = blsobjective
        # task[11] = blsmethod
        # task[12] = blsoversample

        # populate the tasks list
        tasks = [(stimes, smags, serrs, magsarefluxes, chunk_minf, chunk_nf,
                  stepsize, ndurations, mintransitduration, maxtransitduration,
                  blsobjective, blsmethod, blsoversample)
                 for (chunk_minf,
                      chunk_nf) in zip(chunk_minfreqs, chunk_nfreqs)]

        if verbose:
            for ind, task in enumerate(tasks):
                LOGINFO('worker %s: minfreq = %.6f, nfreqs = %s' %
                        (ind + 1, task[4], task[5]))
            LOGINFO('running...')

        # return tasks

        # start the pool
        pool = Pool(nworkers)
        results = pool.map(_parallel_bls_worker, tasks)

        pool.close()
        pool.join()
        del pool

        # now concatenate the output lsp arrays
        lsp = npconcatenate([x['power'] for x in results])
        periods = 1.0 / frequencies

        # find the nbestpeaks for the periodogram: 1. sort the lsp array
        # by highest value first 2. go down the values until we find
        # five values that are separated by at least periodepsilon in
        # period
        # make sure to get only the finite peaks in the periodogram
        # this is needed because BLS may produce infs for some peaks
        finitepeakind = npisfinite(lsp)
        finlsp = lsp[finitepeakind]
        finperiods = periods[finitepeakind]

        # make sure that finlsp has finite values before we work on it
        try:

            bestperiodind = npargmax(finlsp)

        except ValueError:

            LOGERROR('no finite periodogram values '
                     'for this mag series, skipping...')

            return {
                'bestperiod': npnan,
                'bestlspval': npnan,
                'nbestpeaks': nbestpeaks,
                'nbestinds': None,
                'nbestlspvals': None,
                'nbestperiods': None,
                'lspvals': None,
                'periods': None,
                'durations': None,
                'method': 'bls',
                'blsresult': None,
                'blsmodel': None,
                'kwargs': {
                    'startp': startp,
                    'endp': endp,
                    'stepsize': stepsize,
                    'mintransitduration': mintransitduration,
                    'maxtransitduration': maxtransitduration,
                    'ndurations': ndurations,
                    'blsobjective': blsobjective,
                    'blsmethod': blsmethod,
                    'blsoversample': blsoversample,
                    'autofreq': autofreq,
                    'periodepsilon': periodepsilon,
                    'nbestpeaks': nbestpeaks,
                    'sigclip': sigclip,
                    'magsarefluxes': magsarefluxes
                }
            }

        sortedlspind = npargsort(finlsp)[::-1]
        sortedlspperiods = finperiods[sortedlspind]
        sortedlspvals = finlsp[sortedlspind]

        # now get the nbestpeaks
        nbestperiods, nbestlspvals, nbestinds, peakcount = ([
            finperiods[bestperiodind]
        ], [finlsp[bestperiodind]], [bestperiodind], 1)
        prevperiod = sortedlspperiods[0]

        # find the best nbestpeaks in the lsp and their periods
        for period, lspval, ind in zip(sortedlspperiods, sortedlspvals,
                                       sortedlspind):

            if peakcount == nbestpeaks:
                break
            perioddiff = abs(period - prevperiod)
            bestperiodsdiff = [abs(period - x) for x in nbestperiods]

            # this ensures that this period is different from the last
            # period and from all the other existing best periods by
            # periodepsilon to make sure we jump to an entire different
            # peak in the periodogram
            if (perioddiff > (periodepsilon * prevperiod)
                    and all(x > (periodepsilon * period)
                            for x in bestperiodsdiff)):
                nbestperiods.append(period)
                nbestlspvals.append(lspval)
                nbestinds.append(ind)
                peakcount = peakcount + 1

            prevperiod = period

        # generate the return dict
        resultdict = {
            'bestperiod': finperiods[bestperiodind],
            'bestlspval': finlsp[bestperiodind],
            'nbestpeaks': nbestpeaks,
            'nbestinds': nbestinds,
            'nbestlspvals': nbestlspvals,
            'nbestperiods': nbestperiods,
            'lspvals': lsp,
            'frequencies': frequencies,
            'periods': periods,
            'durations': [x['durations'] for x in results],
            'blsresult': [x['blsresult'] for x in results],
            'blsmodel': [x['blsmodel'] for x in results],
            'stepsize': stepsize,
            'nfreq': nfreq,
            'mintransitduration': mintransitduration,
            'maxtransitduration': maxtransitduration,
            'method': 'bls',
            'kwargs': {
                'startp': startp,
                'endp': endp,
                'stepsize': stepsize,
                'mintransitduration': mintransitduration,
                'maxtransitduration': maxtransitduration,
                'ndurations': ndurations,
                'blsobjective': blsobjective,
                'blsmethod': blsmethod,
                'blsoversample': blsoversample,
                'autofreq': autofreq,
                'periodepsilon': periodepsilon,
                'nbestpeaks': nbestpeaks,
                'sigclip': sigclip,
                'magsarefluxes': magsarefluxes
            }
        }

        return resultdict

    else:

        LOGERROR('no good detections for these times and mags, skipping...')
        return {
            'bestperiod': npnan,
            'bestlspval': npnan,
            'nbestinds': None,
            'nbestpeaks': nbestpeaks,
            'nbestlspvals': None,
            'nbestperiods': None,
            'lspvals': None,
            'periods': None,
            'durations': None,
            'blsresult': None,
            'blsmodel': None,
            'stepsize': stepsize,
            'nfreq': None,
            'nphasebins': None,
            'mintransitduration': mintransitduration,
            'maxtransitduration': maxtransitduration,
            'method': 'bls',
            'kwargs': {
                'startp': startp,
                'endp': endp,
                'stepsize': stepsize,
                'mintransitduration': mintransitduration,
                'maxtransitduration': maxtransitduration,
                'ndurations': ndurations,
                'blsobjective': blsobjective,
                'blsmethod': blsmethod,
                'blsoversample': blsoversample,
                'autofreq': autofreq,
                'periodepsilon': periodepsilon,
                'nbestpeaks': nbestpeaks,
                'sigclip': sigclip,
                'magsarefluxes': magsarefluxes
            }
        }
Exemple #22
0
def sigclip_magseries_with_extparams(times,
                                     mags,
                                     errs,
                                     extparams,
                                     sigclip=None,
                                     iterative=False,
                                     magsarefluxes=False):
    '''Select the finite times, magnitudes (or fluxes), and errors from the passed
    values, and apply symmetric or asymmetric sigma clipping to them.  Returns
    sigma-clipped times, mags, and errs. Uses the same indices to filter out the
    values of all arrays in the extparams list.

    Args:
        times (np.array): ...

        mags (np.array): numpy array to sigma-clip. Does not assume all values
        are finite. Does not assume anything about whether they're
        positive/negative.

        errs (np.array): ...

        extparams (list of np.arrays): external parameters to also filter using
        the same indices as used for sigma-clipping.

        iterative (bool): True if you want iterative sigma-clipping.

        magsarefluxes (bool): True if your "mags" are in fact fluxes, i.e. if
        "dimming" corresponds to your "mags" getting smaller.

        sigclip (float or list): If float, apply symmetric sigma clipping. If
        list, e.g., [10., 3.], will sigclip out greater than 10-sigma dimmings
        and greater than 3-sigma brightenings. Here the meaning of "dimming"
        and "brightening" is set by *physics* (not the magnitude system), which
        is why the `magsarefluxes` kwarg must be correctly set.

    Returns:
        stimes, smags, serrs: (sigmaclipped values of each).

    '''

    returnerrs = True

    # fake the errors if they don't exist
    # this is inconsequential to sigma-clipping
    # we don't return these dummy values if the input errs are None
    if errs is None:
        # assume 0.1% errors if not given
        # this should work for mags and fluxes
        errs = 0.001 * mags
        returnerrs = False

    # filter the input times, mags, errs; do sigclipping and normalization
    find = npisfinite(times) & npisfinite(mags) & npisfinite(errs)
    ftimes, fmags, ferrs = times[find], mags[find], errs[find]

    # apply the same indices to the external parameters
    for epi, eparr in enumerate(extparams):
        extparams[epi] = eparr[find]

    # get the median and stdev = 1.483 x MAD
    median_mag = npmedian(fmags)
    stddev_mag = (npmedian(npabs(fmags - median_mag))) * 1.483

    # sigclip next for a single sigclip value
    if sigclip and isinstance(sigclip, (float, int)):

        if not iterative:

            sigind = (npabs(fmags - median_mag)) < (sigclip * stddev_mag)

            stimes = ftimes[sigind]
            smags = fmags[sigind]
            serrs = ferrs[sigind]

            # apply the same indices to the external parameters
            for epi, eparr in enumerate(extparams):
                extparams[epi] = eparr[sigind]

        else:

            #
            # iterative version adapted from scipy.stats.sigmaclip
            #
            delta = 1

            this_times = ftimes
            this_mags = fmags
            this_errs = ferrs

            while delta:

                this_median = npmedian(this_mags)
                this_stdev = (npmedian(npabs(this_mags - this_median))) * 1.483
                this_size = this_mags.size

                # apply the sigclip
                tsi = (npabs(this_mags - this_median)) < (sigclip * this_stdev)

                # update the arrays
                this_times = this_times[tsi]
                this_mags = this_mags[tsi]
                this_errs = this_errs[tsi]

                # apply the same indices to the external parameters
                for epi, eparr in enumerate(extparams):
                    extparams[epi] = eparr[tsi]

                # update delta and go to the top of the loop
                delta = this_size - this_mags.size

            # final sigclipped versions
            stimes, smags, serrs = this_times, this_mags, this_errs

    # this handles sigclipping for asymmetric +ve and -ve clip values
    elif sigclip and isinstance(sigclip, list) and len(sigclip) == 2:

        # sigclip is passed as [dimmingclip, brighteningclip]
        dimmingclip = sigclip[0]
        brighteningclip = sigclip[1]

        if not iterative:

            if magsarefluxes:
                nottoodimind = ((fmags - median_mag) >
                                (-dimmingclip * stddev_mag))
                nottoobrightind = ((fmags - median_mag) <
                                   (brighteningclip * stddev_mag))
            else:
                nottoodimind = ((fmags - median_mag) <
                                (dimmingclip * stddev_mag))
                nottoobrightind = ((fmags - median_mag) >
                                   (-brighteningclip * stddev_mag))

            sigind = nottoodimind & nottoobrightind

            stimes = ftimes[sigind]
            smags = fmags[sigind]
            serrs = ferrs[sigind]

            # apply the same indices to the external parameters
            for epi, eparr in enumerate(extparams):
                extparams[epi] = eparr[sigind]

        else:

            #
            # iterative version adapted from scipy.stats.sigmaclip
            #
            delta = 1

            this_times = ftimes
            this_mags = fmags
            this_errs = ferrs

            while delta:

                this_median = npmedian(this_mags)
                this_stdev = (npmedian(npabs(this_mags - this_median))) * 1.483
                this_size = this_mags.size

                if magsarefluxes:
                    nottoodimind = ((this_mags - this_median) >
                                    (-dimmingclip * this_stdev))
                    nottoobrightind = ((this_mags - this_median) <
                                       (brighteningclip * this_stdev))
                else:
                    nottoodimind = ((this_mags - this_median) <
                                    (dimmingclip * this_stdev))
                    nottoobrightind = ((this_mags - this_median) >
                                       (-brighteningclip * this_stdev))

                # apply the sigclip
                tsi = nottoodimind & nottoobrightind

                # update the arrays
                this_times = this_times[tsi]
                this_mags = this_mags[tsi]
                this_errs = this_errs[tsi]

                # apply the same indices to the external parameters
                for epi, eparr in enumerate(extparams):
                    extparams[epi] = eparr[tsi]

                # update delta and go to top of the loop
                delta = this_size - this_mags.size

            # final sigclipped versions
            stimes, smags, serrs = this_times, this_mags, this_errs

    else:

        stimes = ftimes
        smags = fmags
        serrs = ferrs

    if returnerrs:
        return stimes, smags, serrs, extparams
    else:
        return stimes, smags, None, extparams
Exemple #23
0
def macf_period_find(
        times,
        mags,
        errs,
        fillgaps=0.0,
        filterwindow=11,
        forcetimebin=None,
        maxlags=None,
        maxacfpeaks=10,
        smoothacf=21,  # set for Kepler-type LCs, see details below
        smoothfunc=_smooth_acf_savgol,
        smoothfunckwargs=None,
        magsarefluxes=False,
        sigclip=3.0,
        verbose=True,
        periodepsilon=0.1,  # doesn't do anything, for consistent external API
        nworkers=None,  # doesn't do anything, for consistent external API
        startp=None,  # doesn't do anything, for consistent external API
        endp=None,  # doesn't do anything, for consistent external API
        autofreq=None,  # doesn't do anything, for consistent external API
        stepsize=None,  # doesn't do anything, for consistent external API
):
    '''This finds periods using the McQuillan+ (2013a, 2014) ACF method.

    The kwargs from `periodepsilon` to `stepsize` don't do anything but are used
    to present a consistent API for all periodbase period-finders to an outside
    driver (e.g. the one in the checkplotserver).

    Parameters
    ----------

    times,mags,errs : np.array
        The input magnitude/flux time-series to run the period-finding for.

    fillgaps : 'noiselevel' or float
        This sets what to use to fill in gaps in the time series. If this is
        'noiselevel', will smooth the light curve using a point window size of
        `filterwindow` (this should be an odd integer), subtract the smoothed LC
        from the actual LC and estimate the RMS. This RMS will be used to fill
        in the gaps. Other useful values here are 0.0, and npnan.

    filterwindow : int
        The light curve's smoothing filter window size to use if
        `fillgaps='noiselevel`'.

    forcetimebin : None or float
        This is used to force a particular cadence in the light curve other than
        the automatically determined cadence. This effectively rebins the light
        curve to this cadence. This should be in the same time units as `times`.

    maxlags : None or int
        This is the maximum number of lags to calculate. If None, will calculate
        all lags.

    maxacfpeaks : int
        This is the maximum number of ACF peaks to use when finding the highest
        peak and obtaining a fit period.

    smoothacf : int
        This is the number of points to use as the window size when smoothing
        the ACF with the `smoothfunc`. This should be an odd integer value. If
        this is None, will not smooth the ACF, but this will probably lead to
        finding spurious peaks in a generally noisy ACF.

        For Kepler, a value between 21 and 51 seems to work fine. For ground
        based data, much larger values may be necessary: between 1001 and 2001
        seem to work best for the HAT surveys. This is dependent on cadence, RMS
        of the light curve, the periods of the objects you're looking for, and
        finally, any correlated noise in the light curve. Make a plot of the
        smoothed/unsmoothed ACF vs. lag using the result dict of this function
        and the `plot_acf_results` function above to see the identified ACF
        peaks and what kind of smoothing might be needed.

        The value of `smoothacf` will also be used to figure out the interval to
        use when searching for local peaks in the ACF: this interval is 1/2 of
        the `smoothacf` value.

    smoothfunc : Python function
        This is the function that will be used to smooth the ACF. This should
        take at least one kwarg: 'windowsize'. Other kwargs can be passed in
        using a dict provided in `smoothfunckwargs`. By default, this uses a
        Savitsky-Golay filter, a Gaussian filter is also provided but not
        used. Another good option would be an actual low-pass filter (generated
        using scipy.signal?) to remove all high frequency noise from the ACF.

    smoothfunckwargs : dict or None
        The dict of optional kwargs to pass in to the `smoothfunc`.

    magsarefluxes : bool
        If your input measurements in `mags` are actually fluxes instead of
        mags, set this is True.

    sigclip : float or int or sequence of two floats/ints or None
        If a single float or int, a symmetric sigma-clip will be performed using
        the number provided as the sigma-multiplier to cut out from the input
        time-series.

        If a list of two ints/floats is provided, the function will perform an
        'asymmetric' sigma-clip. The first element in this list is the sigma
        value to use for fainter flux/mag values; the second element in this
        list is the sigma value to use for brighter flux/mag values. For
        example, `sigclip=[10., 3.]`, will sigclip out greater than 10-sigma
        dimmings and greater than 3-sigma brightenings. Here the meaning of
        "dimming" and "brightening" is set by *physics* (not the magnitude
        system), which is why the `magsarefluxes` kwarg must be correctly set.

        If `sigclip` is None, no sigma-clipping will be performed, and the
        time-series (with non-finite elems removed) will be passed through to
        the output.

    verbose : bool
        If True, will indicate progress and report errors.

    Returns
    -------

    dict
        Returns a dict with results. dict['bestperiod'] is the estimated best
        period and dict['fitperiodrms'] is its estimated error. Other
        interesting things in the output include:

        - dict['acfresults']: all results from calculating the ACF. in
          particular, the unsmoothed ACF might be of interest:
          dict['acfresults']['acf'] and dict['acfresults']['lags'].

        - dict['lags'] and dict['acf'] contain the ACF after smoothing was
          applied.

        - dict['periods'] and dict['lspvals'] can be used to construct a
          pseudo-periodogram.

        - dict['naivebestperiod'] is obtained by multiplying the lag at the
          highest ACF peak with the cadence. This is usually close to the fit
          period (dict['fitbestperiod']), which is calculated by doing a fit to
          the lags vs. peak index relation as in McQuillan+ 2014.

    '''

    # get the ACF
    acfres = autocorr_magseries(times,
                                mags,
                                errs,
                                maxlags=maxlags,
                                fillgaps=fillgaps,
                                forcetimebin=forcetimebin,
                                sigclip=sigclip,
                                magsarefluxes=magsarefluxes,
                                filterwindow=filterwindow,
                                verbose=verbose)

    xlags = acfres['lags']

    # smooth the ACF if requested
    if smoothacf and isinstance(smoothacf, int) and smoothacf > 0:

        if smoothfunckwargs is None:
            sfkwargs = {'windowsize': smoothacf}
        else:
            sfkwargs = smoothfunckwargs.copy()
            sfkwargs.update({'windowsize': smoothacf})

        xacf = smoothfunc(acfres['acf'], **sfkwargs)

    else:

        xacf = acfres['acf']

    # get the relative peak heights and fit best lag
    peakres = _get_acf_peakheights(xlags,
                                   xacf,
                                   npeaks=maxacfpeaks,
                                   searchinterval=int(smoothacf / 2))

    # this is the best period's best ACF peak height
    bestlspval = peakres['bestpeakheight']

    try:

        # get the fit best lag from a linear fit to the peak index vs time(peak
        # lag) function as in McQillian+ (2014)
        fity = npconcatenate(([
            0.0, peakres['bestlag']
        ], peakres['relpeaklags'][peakres['relpeaklags'] > peakres['bestlag']]
                              ))
        fity = fity * acfres['cadence']
        fitx = nparange(fity.size)

        fitcoeffs, fitcovar = nppolyfit(fitx, fity, 1, cov=True)

        # fit best period is the gradient of fit
        fitbestperiod = fitcoeffs[0]
        bestperiodrms = npsqrt(fitcovar[0, 0])  # from the covariance matrix

    except Exception as e:

        LOGWARNING('linear fit to time at each peak lag '
                   'value vs. peak number failed, '
                   'naively calculated ACF period may not be accurate')
        fitcoeffs = nparray([npnan, npnan])
        fitcovar = nparray([[npnan, npnan], [npnan, npnan]])
        fitbestperiod = npnan
        bestperiodrms = npnan
        raise

    # calculate the naive best period using delta_tau = lag * cadence
    naivebestperiod = peakres['bestlag'] * acfres['cadence']

    if fitbestperiod < naivebestperiod:
        LOGWARNING('fit bestperiod = %.5f may be an alias, '
                   'naively calculated bestperiod is = %.5f' %
                   (fitbestperiod, naivebestperiod))

    if npisfinite(fitbestperiod):
        bestperiod = fitbestperiod
    else:
        bestperiod = naivebestperiod

    return {
        'bestperiod':
        bestperiod,
        'bestlspval':
        bestlspval,
        'nbestpeaks':
        maxacfpeaks,
        # for compliance with the common pfmethod API
        'nbestperiods':
        npconcatenate([[fitbestperiod], peakres['relpeaklags'][1:maxacfpeaks] *
                       acfres['cadence']]),
        'nbestlspvals':
        peakres['maxacfs'][:maxacfpeaks],
        'lspvals':
        xacf,
        'periods':
        xlags * acfres['cadence'],
        'acf':
        xacf,
        'lags':
        xlags,
        'method':
        'acf',
        'naivebestperiod':
        naivebestperiod,
        'fitbestperiod':
        fitbestperiod,
        'fitperiodrms':
        bestperiodrms,
        'periodfitcoeffs':
        fitcoeffs,
        'periodfitcovar':
        fitcovar,
        'kwargs': {
            'maxlags': maxlags,
            'maxacfpeaks': maxacfpeaks,
            'fillgaps': fillgaps,
            'filterwindow': filterwindow,
            'smoothacf': smoothacf,
            'smoothfunckwargs': sfkwargs,
            'magsarefluxes': magsarefluxes,
            'sigclip': sigclip
        },
        'acfresults':
        acfres,
        'acfpeaks':
        peakres
    }