Example #1
0
def fillcollection(e_photon = 600., nphotos=10,nvalence=1,nsigstars=10,npistars=20):
    ph_a = 2.
    ph_scale = 1.
    ph_ip = 540.
    v_ip = 22.
    v_scale = 1.
    v_a = 2.
    sigstar_a = 5.
    sigstar_e=542.
    sigstar_scale = 0.5
    pistar_a = 2.
    pistar_e=532.
    pistar_scale = 0.5
    c , loc = 1. , 0.
    e = e_photon - ph_ip + ph_a - samplegamma(a=ph_a,c=c,loc=loc,scale=ph_scale,n=nphotos)
    v = nparray([val for val in e if val >0])
    e = e_photon - v_ip  + v_a - samplegamma(a = v_a,c=c,loc=0,scale=v_scale,n=nvalence)
    v = npconcatenate( (v, nparray([val for val in e if val >0])))
    #print(v.shape)
    e = sigstar_e + sigstar_a - samplegamma(a = sigstar_a,c=1.,loc=0,scale=sigstar_scale,n=nsigstars)
    v = npconcatenate( (v, nparray([val for val in e if val > 0])))
    #print(v.shape)
    e = pistar_e + pistar_a - samplegamma(a = pistar_a,c=1.,loc=0,scale=pistar_scale,n=npistars)
    v = npconcatenate( (v, nparray([val for val in e if val > 0])))
    #print(v.shape)
    shuffle(v)
    return v
def test_concatenate():

    from numpy import concatenate as npconcatenate

    x = arange(2 * 3 * 4).reshape((2, 3, 4))
    b = concatenate((x, x))
    assert allclose(npconcatenate((x, x)), b.toarray())
Example #3
0
def read_knmi_data(reference_station):
    '''
    Calculate or load KNMI reference data:
        pickled file exists -> load
        pickled file doesn't exist -> calculate
    '''
    from load_knmi_data import load_knmi_data
    import glob
    from numpy import sort
    from numpy import concatenate
    import collections
    # generate filename of KNMI station
    filenames = sort(glob.glob('KNMI/uurgeg_' + str(reference_station) + '*.zip' ))
    # load all csv files in list of dictionaries
    dicts = [load_knmi_data(filename).csvdata for filename in filenames]
    # merge all dictionaries in a super dictionary
    knmi_data = collections.defaultdict(list)
    for idx in range(0,len(dicts)):
      try:
        knmi_data = dict((k, npconcatenate((knmi_data.get(k), dicts[idx].get(k)))) for k in set(knmi_data.keys() + dicts[idx].keys()))
      except ValueError:
        # cannot concatenate empty arrays
        knmi_data = dict((k, dicts[idx].get(k)) for k in dicts[idx].keys())
    # return dictionary with all variables/time steps
    return knmi_data
Example #4
0
    def concatenate(self, arry, axis=0):
        """
        Join this array with another array.

        Paramters
        ---------
        arry : ndarray, BoltArrayLocal, or BoltArraySpark
            Another array to concatenate with

        axis : int, optional, default=0
            The axis along which arrays will be joined.

        Returns
        -------
        BoltArraySpark
        """
        if isinstance(arry, ndarray):
            from bolt.spark.construct import ConstructSpark
            arry = ConstructSpark.array(arry,
                                        self._rdd.context,
                                        axis=range(0, self.split))
        else:
            if not isinstance(arry, BoltArraySpark):
                raise ValueError(
                    "other must be local array or spark array, got %s" %
                    type(arry))

        if not all([
                x == y if not i == axis else True
                for i, (x, y) in enumerate(zip(self.shape, arry.shape))
        ]):
            raise ValueError("all the input array dimensions except for "
                             "the concatenation axis must match exactly")

        if not self.split == arry.split:
            raise NotImplementedError("two arrays must have the same split ")

        if axis < self.split:
            shape = self.keys.shape

            def key_func(key):
                key = list(key)
                key[axis] += shape[axis]
                return tuple(key)

            rdd = self._rdd.union(
                arry._rdd.map(lambda kv: (key_func(kv[0]), kv[1])))

        else:
            from numpy import concatenate as npconcatenate
            shift = axis - self.split
            rdd = self._rdd.join(arry._rdd).map(
                lambda kv: (kv[0], npconcatenate(kv[1], axis=shift)))

        shape = tuple([
            x + y if i == axis else x
            for i, (x, y) in enumerate(zip(self.shape, arry.shape))
        ])

        return self._constructor(rdd, shape=shape).__finalize__(self)
 def diffeomorphism(self, q, q_d):
     q = q.transpose()
     q_d = q_d.transpose()
     self.diffeomorphism_model.eval()
     if self.traj_input:
         input = npconcatenate((q, q_d), axis=1)
     else:
         input = q
     diff_pred = self.diffeomorphism_model.predict(from_numpy(input))
     return (q + diff_pred).T
Example #6
0
def test_concatenate(sc):

    from numpy import concatenate as npconcatenate
    x = arange(2*3*4).reshape((2, 3, 4))

    b = array(x, sc, axis=0)
    bb = concatenate((b, b), axis=0)
    assert allclose(npconcatenate((x, x), axis=0), bb.toarray())

    bb = concatenate((b, b), axis=1)
    assert allclose(npconcatenate((x, x), axis=1), bb.toarray())

    bb = concatenate((b, b), axis=2)
    assert allclose(npconcatenate((x, x), axis=2), bb.toarray())

    b = array(x, sc, axis=(0, 1))
    bb = concatenate((b, b), axis=0)
    assert allclose(npconcatenate((x, x), axis=0), bb.toarray())

    b = array(x, sc, axis=(0, 1))
    bb = concatenate((b, b), axis=1)
    assert allclose(npconcatenate((x, x), axis=1), bb.toarray())

    b = array(x, sc, axis=(0, 1))
    bb = concatenate((b, b), axis=2)
    assert allclose(npconcatenate((x, x), axis=2), bb.toarray())
Example #7
0
    def concatenate(self, arry, axis=0):
        """
        Join this array with another array.

        Paramters
        ---------
        arry : ndarray, BoltArrayLocal, or BoltArraySpark
            Another array to concatenate with

        axis : int, optional, default=0
            The axis along which arrays will be joined.

        Returns
        -------
        BoltArraySpark
        """
        if isinstance(arry, ndarray):
            from bolt.spark.construct import ConstructSpark
            arry = ConstructSpark.array(arry, self._rdd.context, axis=range(0, self.split))
        else:
            if not isinstance(arry, BoltArraySpark):
                raise ValueError("other must be local array or spark array, got %s" % type(arry))

        if not all([x == y if not i == axis else True
                    for i, (x, y) in enumerate(zip(self.shape, arry.shape))]):
            raise ValueError("all the input array dimensions except for "
                             "the concatenation axis must match exactly")

        if not self.split == arry.split:
            raise NotImplementedError("two arrays must have the same split ")

        if axis < self.split:
            shape = self.keys.shape

            def key_func(key):
                key = list(key)
                key[axis] += shape[axis]
                return tuple(key)

            rdd = self._rdd.union(arry._rdd.map(lambda kv: (key_func(kv[0]), kv[1])))

        else:
            from numpy import concatenate as npconcatenate
            shift = axis - self.split
            rdd = self._rdd.join(arry._rdd).map(lambda kv: (kv[0], npconcatenate(kv[1], axis=shift)))

        shape = tuple([x + y if i == axis else x
                      for i, (x, y) in enumerate(zip(self.shape, arry.shape))])

        return self._constructor(rdd, shape=shape).__finalize__(self)
Example #8
0
 def read_knmi_data(self, stationid):
     '''
     Calculate or load KNMI reference data:
         pickled file exists -> load
         pickled file doesn't exist -> calculate
     '''
     # generate filename of KNMI station
     filenames = sort(glob.glob(os.path.join(self.outputdir, 'uurgeg_' + str(stationid.split()[0]) + '*.zip' )))
     # load all csv files in list of dictionaries
     dicts = [load_knmi_data(filename).csvdata for filename in filenames]
     # merge all dictionaries in a super dictionary
     knmi_data = collections.defaultdict(list)
     for idx in range(0,len(dicts)):
       try:
         knmi_data = dict((k, npconcatenate((knmi_data.get(k), dicts[idx].get(k)))) for k in set(knmi_data.keys() + dicts[idx].keys()))
       except ValueError:
         # cannot concatenate empty arrays
         knmi_data = dict((k, dicts[idx].get(k)) for k in dicts[idx].keys())
     # return dictionary with all variables/time steps
     return knmi_data
Example #9
0
def bls_parallel_pfind(
    times,
    mags,
    errs,
    magsarefluxes=False,
    startp=0.1,  # by default, search from 0.1 d to...
    endp=100.0,  # ... 100.0 d -- don't search full timebase
    stepsize=1.0e-4,
    mintransitduration=0.01,  # minimum transit length in phase
    maxtransitduration=0.4,  # maximum transit length in phase
    ndurations=100,
    autofreq=True,  # figure out f0, nf, and df automatically
    blsobjective='likelihood',
    blsmethod='fast',
    blsoversample=5,
    blsmintransits=3,
    blsfreqfactor=10.0,
    nbestpeaks=5,
    periodepsilon=0.1,  # 0.1
    sigclip=10.0,
    endp_timebase_check=True,
    verbose=True,
    nworkers=None,
):
    '''Runs the Box Least Squares Fitting Search for transit-shaped signals.

    Breaks up the full frequency space into chunks and passes them to parallel
    BLS workers.

    Based on the version of BLS in Astropy 3.1:
    `astropy.stats.BoxLeastSquares`. If you don't have Astropy 3.1, this module
    will fail to import. Note that by default, this implementation of
    `bls_parallel_pfind` doesn't use the `.autoperiod()` function from
    `BoxLeastSquares` but uses the same auto frequency-grid generation as the
    functions in `periodbase.kbls`. If you want to use Astropy's implementation,
    set the value of `autofreq` kwarg to 'astropy'. The generated period array
    will then be broken up into chunks and sent to the individual workers.

    NOTE: the combined BLS spectrum produced by this function is not identical
    to that produced by running BLS in one shot for the entire frequency
    space. There are differences on the order of 1.0e-3 or so in the respective
    peak values, but peaks appear at the same frequencies for both methods. This
    is likely due to different aliasing caused by smaller chunks of the
    frequency space used by the parallel workers in this function. When in
    doubt, confirm results for this parallel implementation by comparing to
    those from the serial implementation above.

    In particular, when you want to get reliable estimates of the SNR, transit
    depth, duration, etc. that Astropy's BLS gives you, rerun `bls_serial_pfind`
    with `startp`, and `endp` close to the best period you want to characterize
    the transit at. The dict returned from that function contains a `blsmodel`
    key, which is the generated model from Astropy's BLS. Use the
    `.compute_stats()` method to calculate the required stats.

    Parameters
    ----------

    times,mags,errs : np.array
        The magnitude/flux time-series to search for transits.

    magsarefluxes : bool
        If the input measurement values in `mags` and `errs` are in fluxes, set
        this to True.

    startp,endp : float
        The minimum and maximum periods to consider for the transit search.

    stepsize : float
        The step-size in frequency to use when constructing a frequency grid for
        the period search.

    mintransitduration,maxtransitduration : float
        The minimum and maximum transitdurations (in units of phase) to consider
        for the transit search.

    ndurations : int
        The number of transit durations to use in the period-search.

    autofreq : bool or str
        If this is True, the values of `stepsize` and `nphasebins` will be
        ignored, and these, along with a frequency-grid, will be determined
        based on the following relations::

            nphasebins = int(ceil(2.0/mintransitduration))
            if nphasebins > 3000:
                nphasebins = 3000

            stepsize = 0.25*mintransitduration/(times.max()-times.min())

            minfreq = 1.0/endp
            maxfreq = 1.0/startp
            nfreq = int(ceil((maxfreq - minfreq)/stepsize))

        If this is False, you must set `startp`, `endp`, and `stepsize` as
        appropriate.

        If this is str == 'astropy', will use the
        `astropy.stats.BoxLeastSquares.autoperiod()` function to calculate the
        frequency grid instead of the kbls method.

    blsobjective : {'likelihood','snr'}
        Sets the type of objective to optimize in the `BoxLeastSquares.power()`
        function.

    blsmethod : {'fast','slow'}
        Sets the type of method to use in the `BoxLeastSquares.power()`
        function.

    blsoversample : {'likelihood','snr'}
        Sets the `oversample` kwarg for the `BoxLeastSquares.power()` function.

    blsmintransits : int
        Sets the `min_n_transits` kwarg for the `BoxLeastSquares.autoperiod()`
        function.

    blsfreqfactor : float
        Sets the `frequency_factor` kwarg for the `BoxLeastSquares.autoperiod()`
        function.

    periodepsilon : float
        The fractional difference between successive values of 'best' periods
        when sorting by periodogram power to consider them as separate periods
        (as opposed to part of the same periodogram peak). This is used to avoid
        broad peaks in the periodogram and make sure the 'best' periods returned
        are all actually independent.

    nbestpeaks : int
        The number of 'best' peaks to return from the periodogram results,
        starting from the global maximum of the periodogram peak values.

    sigclip : float or int or sequence of two floats/ints or None
        If a single float or int, a symmetric sigma-clip will be performed using
        the number provided as the sigma-multiplier to cut out from the input
        time-series.

        If a list of two ints/floats is provided, the function will perform an
        'asymmetric' sigma-clip. The first element in this list is the sigma
        value to use for fainter flux/mag values; the second element in this
        list is the sigma value to use for brighter flux/mag values. For
        example, `sigclip=[10., 3.]`, will sigclip out greater than 10-sigma
        dimmings and greater than 3-sigma brightenings. Here the meaning of
        "dimming" and "brightening" is set by *physics* (not the magnitude
        system), which is why the `magsarefluxes` kwarg must be correctly set.

        If `sigclip` is None, no sigma-clipping will be performed, and the
        time-series (with non-finite elems removed) will be passed through to
        the output.

    endp_timebase_check : bool
        If True, will check if the ``endp`` value is larger than the time-base
        of the observations. If it is, will change the ``endp`` value such that
        it is half of the time-base. If False, will allow an ``endp`` larger
        than the time-base of the observations.

    verbose : bool
        If this is True, will indicate progress and details about the frequency
        grid used for the period search.

    nworkers : int or None
        The number of parallel workers to launch for period-search. If None,
        nworkers = NCPUS.

    Returns
    -------

    dict
        This function returns a dict, referred to as an `lspinfo` dict in other
        astrobase functions that operate on periodogram results. This is a
        standardized format across all astrobase period-finders, and is of the
        form below::

            {'bestperiod': the best period value in the periodogram,
             'bestlspval': the periodogram peak associated with the best period,
             'nbestpeaks': the input value of nbestpeaks,
             'nbestlspvals': nbestpeaks-size list of best period peak values,
             'nbestperiods': nbestpeaks-size list of best periods,
             'lspvals': the full array of periodogram powers,
             'frequencies': the full array of frequencies considered,
             'periods': the full array of periods considered,
             'durations': the array of durations used to run BLS,
             'blsresult': Astropy BLS result object (BoxLeastSquaresResult),
             'blsmodel': Astropy BLS BoxLeastSquares object used for work,
             'stepsize': the actual stepsize used,
             'nfreq': the actual nfreq used,
             'durations': the durations array used,
             'mintransitduration': the input mintransitduration,
             'maxtransitduration': the input maxtransitdurations,
             'method':'bls' -> the name of the period-finder method,
             'kwargs':{ dict of all of the input kwargs for record-keeping}}

    '''

    # get rid of nans first and sigclip
    stimes, smags, serrs = sigclip_magseries(times,
                                             mags,
                                             errs,
                                             magsarefluxes=magsarefluxes,
                                             sigclip=sigclip)

    # make sure there are enough points to calculate a spectrum
    if len(stimes) > 9 and len(smags) > 9 and len(serrs) > 9:

        # if we're setting up everything automatically
        if isinstance(autofreq, bool) and autofreq:

            # use heuristic to figure out best timestep
            stepsize = 0.25 * mintransitduration / (stimes.max() -
                                                    stimes.min())

            # now figure out the frequencies to use
            minfreq = 1.0 / endp
            maxfreq = 1.0 / startp
            nfreq = int(npceil((maxfreq - minfreq) / stepsize))

            # say what we're using
            if verbose:
                LOGINFO('min P: %s, max P: %s, nfreq: %s, '
                        'minfreq: %s, maxfreq: %s' %
                        (startp, endp, nfreq, minfreq, maxfreq))
                LOGINFO('autofreq = True: using AUTOMATIC values for '
                        'freq stepsize: %s, ndurations: %s, '
                        'min transit duration: %s, max transit duration: %s' %
                        (stepsize, ndurations, mintransitduration,
                         maxtransitduration))

            use_autoperiod = False

        elif isinstance(autofreq, bool) and not autofreq:

            minfreq = 1.0 / endp
            maxfreq = 1.0 / startp
            nfreq = int(npceil((maxfreq - minfreq) / stepsize))

            # say what we're using
            if verbose:
                LOGINFO('min P: %s, max P: %s, nfreq: %s, '
                        'minfreq: %s, maxfreq: %s' %
                        (startp, endp, nfreq, minfreq, maxfreq))
                LOGINFO('autofreq = False: using PROVIDED values for '
                        'freq stepsize: %s, ndurations: %s, '
                        'min transit duration: %s, max transit duration: %s' %
                        (stepsize, ndurations, mintransitduration,
                         maxtransitduration))

            use_autoperiod = False

        elif isinstance(autofreq, str) and autofreq == 'astropy':

            use_autoperiod = True
            minfreq = 1.0 / endp
            maxfreq = 1.0 / startp

        else:

            LOGERROR("unknown autofreq kwarg encountered. can't continue...")
            return None

        # check the minimum frequency
        if ((minfreq < (1.0 / (stimes.max() - stimes.min())))
                and endp_timebase_check):

            LOGWARNING('the requested max P = %.3f is larger than '
                       'the time base of the observations = %.3f, '
                       ' will make minfreq = 2 x 1/timebase' %
                       (endp, stimes.max() - stimes.min()))
            minfreq = 2.0 / (stimes.max() - stimes.min())
            LOGWARNING('new minfreq: %s, maxfreq: %s' % (minfreq, maxfreq))

        #############################
        ## NOW RUN BLS IN PARALLEL ##
        #############################

        # fix number of CPUs if needed
        if not nworkers or nworkers > NCPUS:
            nworkers = NCPUS
            if verbose:
                LOGINFO('using %s workers...' % nworkers)

        # check if autoperiod is True and get the correct period-grid
        if use_autoperiod:

            # astropy's BLS requires durations in units of time
            durations = nplinspace(mintransitduration * startp,
                                   maxtransitduration * startp, ndurations)

            # set up the correct units for the BLS model
            if magsarefluxes:

                blsmodel = BoxLeastSquares(stimes * u.day,
                                           smags * u.dimensionless_unscaled,
                                           dy=serrs * u.dimensionless_unscaled)

            else:

                blsmodel = BoxLeastSquares(stimes * u.day,
                                           smags * u.mag,
                                           dy=serrs * u.mag)

            periods = nparray(
                blsmodel.autoperiod(durations * u.day,
                                    minimum_period=startp,
                                    maximum_period=endp,
                                    minimum_n_transit=blsmintransits,
                                    frequency_factor=blsfreqfactor))

            frequencies = 1.0 / periods
            nfreq = frequencies.size

            if verbose:
                LOGINFO("autofreq = 'astropy', used .autoperiod() with "
                        "minimum_n_transit = %s, freq_factor = %s "
                        "to generate the frequency grid" %
                        (blsmintransits, blsfreqfactor))
                LOGINFO('stepsize = %s, nfreq = %s, minfreq = %.5f, '
                        'maxfreq = %.5f, ndurations = %s' %
                        (abs(frequencies[1] - frequencies[0]), nfreq, 1.0 /
                         periods.max(), 1.0 / periods.min(), durations.size))

            del blsmodel
            del durations

        # otherwise, use kbls method
        else:

            frequencies = minfreq + nparange(nfreq) * stepsize

        # break up the tasks into chunks
        csrem = int(fmod(nfreq, nworkers))
        csint = int(float(nfreq / nworkers))
        chunk_minfreqs, chunk_nfreqs = [], []

        for x in range(nworkers):

            this_minfreqs = frequencies[x * csint]

            # handle usual nfreqs
            if x < (nworkers - 1):
                this_nfreqs = frequencies[x * csint:x * csint + csint].size
            else:
                this_nfreqs = frequencies[x * csint:x * csint + csint +
                                          csrem].size

            chunk_minfreqs.append(this_minfreqs)
            chunk_nfreqs.append(this_nfreqs)

        # populate the tasks list
        #
        # task[0] = times
        # task[1] = mags
        # task[2] = errs
        # task[3] = magsarefluxes

        # task[4] = minfreq
        # task[5] = nfreq
        # task[6] = stepsize

        # task[7] = nphasebins
        # task[8] = mintransitduration
        # task[9] = maxtransitduration

        # task[10] = blsobjective
        # task[11] = blsmethod
        # task[12] = blsoversample

        # populate the tasks list
        tasks = [(stimes, smags, serrs, magsarefluxes, chunk_minf, chunk_nf,
                  stepsize, ndurations, mintransitduration, maxtransitduration,
                  blsobjective, blsmethod, blsoversample)
                 for (chunk_minf,
                      chunk_nf) in zip(chunk_minfreqs, chunk_nfreqs)]

        if verbose:
            for ind, task in enumerate(tasks):
                LOGINFO('worker %s: minfreq = %.6f, nfreqs = %s' %
                        (ind + 1, task[4], task[5]))
            LOGINFO('running...')

        # return tasks

        # start the pool
        pool = Pool(nworkers)
        results = pool.map(_parallel_bls_worker, tasks)

        pool.close()
        pool.join()
        del pool

        # now concatenate the output lsp arrays
        lsp = npconcatenate([x['power'] for x in results])
        periods = 1.0 / frequencies

        # find the nbestpeaks for the periodogram: 1. sort the lsp array
        # by highest value first 2. go down the values until we find
        # five values that are separated by at least periodepsilon in
        # period
        # make sure to get only the finite peaks in the periodogram
        # this is needed because BLS may produce infs for some peaks
        finitepeakind = npisfinite(lsp)
        finlsp = lsp[finitepeakind]
        finperiods = periods[finitepeakind]

        # make sure that finlsp has finite values before we work on it
        try:

            bestperiodind = npargmax(finlsp)

        except ValueError:

            LOGERROR('no finite periodogram values '
                     'for this mag series, skipping...')

            return {
                'bestperiod': npnan,
                'bestlspval': npnan,
                'nbestpeaks': nbestpeaks,
                'nbestinds': None,
                'nbestlspvals': None,
                'nbestperiods': None,
                'lspvals': None,
                'periods': None,
                'durations': None,
                'method': 'bls',
                'blsresult': None,
                'blsmodel': None,
                'kwargs': {
                    'startp': startp,
                    'endp': endp,
                    'stepsize': stepsize,
                    'mintransitduration': mintransitduration,
                    'maxtransitduration': maxtransitduration,
                    'ndurations': ndurations,
                    'blsobjective': blsobjective,
                    'blsmethod': blsmethod,
                    'blsoversample': blsoversample,
                    'autofreq': autofreq,
                    'periodepsilon': periodepsilon,
                    'nbestpeaks': nbestpeaks,
                    'sigclip': sigclip,
                    'magsarefluxes': magsarefluxes
                }
            }

        sortedlspind = npargsort(finlsp)[::-1]
        sortedlspperiods = finperiods[sortedlspind]
        sortedlspvals = finlsp[sortedlspind]

        # now get the nbestpeaks
        nbestperiods, nbestlspvals, nbestinds, peakcount = ([
            finperiods[bestperiodind]
        ], [finlsp[bestperiodind]], [bestperiodind], 1)
        prevperiod = sortedlspperiods[0]

        # find the best nbestpeaks in the lsp and their periods
        for period, lspval, ind in zip(sortedlspperiods, sortedlspvals,
                                       sortedlspind):

            if peakcount == nbestpeaks:
                break
            perioddiff = abs(period - prevperiod)
            bestperiodsdiff = [abs(period - x) for x in nbestperiods]

            # this ensures that this period is different from the last
            # period and from all the other existing best periods by
            # periodepsilon to make sure we jump to an entire different
            # peak in the periodogram
            if (perioddiff > (periodepsilon * prevperiod)
                    and all(x > (periodepsilon * period)
                            for x in bestperiodsdiff)):
                nbestperiods.append(period)
                nbestlspvals.append(lspval)
                nbestinds.append(ind)
                peakcount = peakcount + 1

            prevperiod = period

        # generate the return dict
        resultdict = {
            'bestperiod': finperiods[bestperiodind],
            'bestlspval': finlsp[bestperiodind],
            'nbestpeaks': nbestpeaks,
            'nbestinds': nbestinds,
            'nbestlspvals': nbestlspvals,
            'nbestperiods': nbestperiods,
            'lspvals': lsp,
            'frequencies': frequencies,
            'periods': periods,
            'durations': [x['durations'] for x in results],
            'blsresult': [x['blsresult'] for x in results],
            'blsmodel': [x['blsmodel'] for x in results],
            'stepsize': stepsize,
            'nfreq': nfreq,
            'mintransitduration': mintransitduration,
            'maxtransitduration': maxtransitduration,
            'method': 'bls',
            'kwargs': {
                'startp': startp,
                'endp': endp,
                'stepsize': stepsize,
                'mintransitduration': mintransitduration,
                'maxtransitduration': maxtransitduration,
                'ndurations': ndurations,
                'blsobjective': blsobjective,
                'blsmethod': blsmethod,
                'blsoversample': blsoversample,
                'autofreq': autofreq,
                'periodepsilon': periodepsilon,
                'nbestpeaks': nbestpeaks,
                'sigclip': sigclip,
                'magsarefluxes': magsarefluxes
            }
        }

        return resultdict

    else:

        LOGERROR('no good detections for these times and mags, skipping...')
        return {
            'bestperiod': npnan,
            'bestlspval': npnan,
            'nbestinds': None,
            'nbestpeaks': nbestpeaks,
            'nbestlspvals': None,
            'nbestperiods': None,
            'lspvals': None,
            'periods': None,
            'durations': None,
            'blsresult': None,
            'blsmodel': None,
            'stepsize': stepsize,
            'nfreq': None,
            'nphasebins': None,
            'mintransitduration': mintransitduration,
            'maxtransitduration': maxtransitduration,
            'method': 'bls',
            'kwargs': {
                'startp': startp,
                'endp': endp,
                'stepsize': stepsize,
                'mintransitduration': mintransitduration,
                'maxtransitduration': maxtransitduration,
                'ndurations': ndurations,
                'blsobjective': blsobjective,
                'blsmethod': blsmethod,
                'blsoversample': blsoversample,
                'autofreq': autofreq,
                'periodepsilon': periodepsilon,
                'nbestpeaks': nbestpeaks,
                'sigclip': sigclip,
                'magsarefluxes': magsarefluxes
            }
        }
Example #10
0
def fourier_fit_magseries(
    times,
    mags,
    errs,
    period,
    fourierorder=None,
    fourierparams=None,
    fix_period=True,
    scale_errs_redchisq_unity=True,
    sigclip=3.0,
    magsarefluxes=False,
    plotfit=False,
    ignoreinitfail=True,
    verbose=True,
    curve_fit_kwargs=None,
):
    '''This fits a Fourier series to a mag/flux time series.

    Parameters
    ----------

    times,mags,errs : np.array
        The input mag/flux time-series to fit a Fourier cosine series to.

    period : float
        The period to use for the Fourier fit.

    fourierorder : None or int
        If this is an int, will be interpreted as the Fourier order of the
        series to fit to the input mag/flux times-series. If this is None and
        `fourierparams` is specified, `fourierparams` will be used directly to
        generate the fit Fourier series. If `fourierparams` is also None, this
        function will try to fit a Fourier cosine series of order 3 to the
        mag/flux time-series.

    fourierparams : list of floats or None
        If this is specified as a list of floats, it must be of the form below::

            [fourier_amp1, fourier_amp2, fourier_amp3,...,fourier_ampN,
             fourier_phase1, fourier_phase2, fourier_phase3,...,fourier_phaseN]

        to specify a Fourier cosine series of order N. If this is None and
        `fourierorder` is specified, the Fourier order specified there will be
        used to construct the Fourier cosine series used to fit the input
        mag/flux time-series. If both are None, this function will try to fit a
        Fourier cosine series of order 3 to the input mag/flux time-series.

    fix_period : bool
        If True, will fix the period with fitting the sinusoidal function to the
        phased light curve.

    scale_errs_redchisq_unity : bool
        If True, the standard errors on the fit parameters will be scaled to
        make the reduced chi-sq = 1.0. This sets the ``absolute_sigma`` kwarg
        for the ``scipy.optimize.curve_fit`` function to False.

    sigclip : float or int or sequence of two floats/ints or None
        If a single float or int, a symmetric sigma-clip will be performed using
        the number provided as the sigma-multiplier to cut out from the input
        time-series.

        If a list of two ints/floats is provided, the function will perform an
        'asymmetric' sigma-clip. The first element in this list is the sigma
        value to use for fainter flux/mag values; the second element in this
        list is the sigma value to use for brighter flux/mag values. For
        example, `sigclip=[10., 3.]`, will sigclip out greater than 10-sigma
        dimmings and greater than 3-sigma brightenings. Here the meaning of
        "dimming" and "brightening" is set by *physics* (not the magnitude
        system), which is why the `magsarefluxes` kwarg must be correctly set.

        If `sigclip` is None, no sigma-clipping will be performed, and the
        time-series (with non-finite elems removed) will be passed through to
        the output.

    magsarefluxes : bool
        If True, will treat the input values of `mags` as fluxes for purposes of
        plotting the fit and sig-clipping.

    plotfit : str or False
        If this is a string, this function will make a plot for the fit to the
        mag/flux time-series and writes the plot to the path specified here.

    ignoreinitfail : bool
        If this is True, ignores the initial failure to find a set of optimized
        Fourier parameters using the global optimization function and proceeds
        to do a least-squares fit anyway.

    verbose : bool
        If True, will indicate progress and warn of any problems.

    curve_fit_kwargs : dict or None
        If not None, this should be a dict containing extra kwargs to pass to
        the scipy.optimize.curve_fit function.

    Returns
    -------

    dict
        This function returns a dict containing the model fit parameters, the
        minimized chi-sq value and the reduced chi-sq value. The form of this
        dict is mostly standardized across all functions in this module::

            {
                'fittype':'fourier',
                'fitinfo':{
                    'finalparams': the list of final model fit params,
                    'finalparamerrs': list of errs for each model fit param,
                    'fitmags': the model fit mags,
                    'fitperiod': the fit period if this wasn't set to fixed,
                    'fitepoch': this is times.min() for this fit type,
                    'actual_fitepoch': time of minimum light from fit model
                    ... other fit function specific keys ...
                },
                'fitchisq': the minimized value of the fit's chi-sq,
                'fitredchisq':the reduced chi-sq value,
                'fitplotfile': the output fit plot if fitplot is not None,
                'magseries':{
                    'times':input times in phase order of the model,
                    'phase':the phases of the model mags,
                    'mags':input mags/fluxes in the phase order of the model,
                    'errs':errs in the phase order of the model,
                    'magsarefluxes':input value of magsarefluxes kwarg
                }
            }

        NOTE: the returned value of 'fitepoch' in the 'fitinfo' dict returned by
        this function is the time value of the first observation since this is
        where the LC is folded for the fit procedure. To get the actual time of
        minimum epoch as calculated by a spline fit to the phased LC, use the
        key 'actual_fitepoch' in the 'fitinfo' dict.

    '''

    stimes, smags, serrs = sigclip_magseries(times,
                                             mags,
                                             errs,
                                             sigclip=sigclip,
                                             magsarefluxes=magsarefluxes)

    # get rid of zero errs
    nzind = npnonzero(serrs)
    stimes, smags, serrs = stimes[nzind], smags[nzind], serrs[nzind]

    phase, pmags, perrs, ptimes, mintime = (get_phased_quantities(
        stimes, smags, serrs, period))

    # get the fourier order either from the scalar order kwarg...
    if fourierorder and fourierorder > 0 and not fourierparams:

        fourieramps = [0.6] + [0.2] * (fourierorder - 1)
        fourierphas = [0.1] + [0.1] * (fourierorder - 1)
        fourierparams = fourieramps + fourierphas

    # or from the fully specified coeffs vector
    elif not fourierorder and fourierparams:

        fourierorder = int(len(fourierparams) / 2)

    else:
        LOGWARNING('specified both/neither Fourier order AND Fourier coeffs, '
                   'using default Fourier order of 3')
        fourierorder = 3
        fourieramps = [0.6] + [0.2] * (fourierorder - 1)
        fourierphas = [0.1] + [0.1] * (fourierorder - 1)
        fourierparams = fourieramps + fourierphas

    if verbose:
        LOGINFO('fitting Fourier series of order %s to '
                'mag series with %s observations, '
                'using period %.6f, folded at %.6f' %
                (fourierorder, len(phase), period, mintime))

    # initial minimize call to find global minimum in chi-sq
    initialfit = spminimize(_fourier_chisq,
                            fourierparams,
                            args=(phase, pmags, perrs))

    # make sure this initial fit succeeds before proceeding
    if initialfit.success or ignoreinitfail:

        if verbose:
            LOGINFO('initial fit done, refining...')

        leastsqparams = initialfit.x

        try:

            curvefit_params = npconcatenate((nparray([period]), leastsqparams))

            # set up the bounds for the fit parameters
            if fix_period:
                curvefit_bounds = ([period - 1.0e-7] +
                                   [-npinf] * fourierorder +
                                   [-npinf] * fourierorder,
                                   [period + 1.0e-7] + [npinf] * fourierorder +
                                   [npinf] * fourierorder)
            else:
                curvefit_bounds = ([0.0] + [-npinf] * fourierorder +
                                   [-npinf] * fourierorder,
                                   [npinf] + [npinf] * fourierorder +
                                   [npinf] * fourierorder)

            curvefit_func = partial(
                sinusoidal.fourier_curvefit_func,
                zerolevel=npmedian(smags),
                epoch=mintime,
                fixed_period=period if fix_period else None,
            )

            if curve_fit_kwargs is not None:

                finalparams, covmatrix = curve_fit(
                    curvefit_func,
                    stimes,
                    smags,
                    p0=curvefit_params,
                    sigma=serrs,
                    bounds=curvefit_bounds,
                    absolute_sigma=(not scale_errs_redchisq_unity),
                    **curve_fit_kwargs)

            else:

                finalparams, covmatrix = curve_fit(
                    curvefit_func,
                    stimes,
                    smags,
                    p0=curvefit_params,
                    sigma=serrs,
                    bounds=curvefit_bounds,
                    absolute_sigma=(not scale_errs_redchisq_unity),
                )

        except Exception:
            LOGEXCEPTION("curve_fit returned an exception")
            finalparams, covmatrix = None, None

        # if the fit succeeded, then we can return the final parameters
        if finalparams is not None and covmatrix is not None:

            # this is the fit period
            fperiod = finalparams[0]

            phase, pmags, perrs, ptimes, mintime = (get_phased_quantities(
                stimes, smags, serrs, fperiod))

            # calculate the chisq and reduced chisq
            fitmags = _fourier_func(finalparams[1:], phase, pmags)

            fitchisq = npsum(
                ((fitmags - pmags) * (fitmags - pmags)) / (perrs * perrs))

            n_free_params = len(pmags) - len(finalparams)
            if fix_period:
                n_free_params -= 1

            fitredchisq = fitchisq / n_free_params
            stderrs = npsqrt(npdiag(covmatrix))

            if verbose:
                LOGINFO('final fit done. chisq = %.5f, reduced chisq = %.5f' %
                        (fitchisq, fitredchisq))

            # figure out the time of light curve minimum (i.e. the fit epoch)
            # this is when the fit mag is maximum (i.e. the faintest)
            # or if magsarefluxes = True, then this is when fit flux is minimum
            if not magsarefluxes:
                fitmagminind = npwhere(fitmags == npmax(fitmags))
            else:
                fitmagminind = npwhere(fitmags == npmin(fitmags))
            if len(fitmagminind[0]) > 1:
                fitmagminind = (fitmagminind[0][0], )

            # assemble the returndict
            returndict = {
                'fittype': 'fourier',
                'fitinfo': {
                    'fourierorder': fourierorder,
                    # return coeffs only for backwards compatibility with
                    # existing functions that use the returned value of
                    # fourier_fit_magseries
                    'finalparams': finalparams[1:],
                    'finalparamerrs': stderrs,
                    'initialfit': initialfit,
                    'fitmags': fitmags,
                    'fitperiod': finalparams[0],
                    # the 'fitepoch' is just the minimum time here
                    'fitepoch': mintime,
                    # the actual fit epoch is calculated as the time of minimum
                    # light OF the fit model light curve
                    'actual_fitepoch': ptimes[fitmagminind]
                },
                'fitchisq': fitchisq,
                'fitredchisq': fitredchisq,
                'fitplotfile': None,
                'magseries': {
                    'times': ptimes,
                    'phase': phase,
                    'mags': pmags,
                    'errs': perrs,
                    'magsarefluxes': magsarefluxes
                },
            }

            # make the fit plot if required
            if plotfit and isinstance(plotfit, str):

                make_fit_plot(phase,
                              pmags,
                              perrs,
                              fitmags,
                              fperiod,
                              mintime,
                              mintime,
                              plotfit,
                              magsarefluxes=magsarefluxes)

                returndict['fitplotfile'] = plotfit

            return returndict

        # if the leastsq fit did not succeed, return Nothing
        else:
            LOGERROR(
                'fourier-fit: least-squared fit to the light curve failed')
            return {
                'fittype': 'fourier',
                'fitinfo': {
                    'fourierorder': fourierorder,
                    'finalparams': None,
                    'finalparamerrs': None,
                    'initialfit': initialfit,
                    'fitmags': None,
                    'fitperiod': None,
                    'fitepoch': None,
                    'actual_fitepoch': None,
                },
                'fitchisq': npnan,
                'fitredchisq': npnan,
                'fitplotfile': None,
                'magseries': {
                    'times': ptimes,
                    'phase': phase,
                    'mags': pmags,
                    'errs': perrs,
                    'magsarefluxes': magsarefluxes
                }
            }

    # if the fit didn't succeed, we can't proceed
    else:

        LOGERROR('initial Fourier fit did not succeed, '
                 'reason: %s, returning scipy OptimizeResult' %
                 initialfit.message)

        return {
            'fittype': 'fourier',
            'fitinfo': {
                'fourierorder': fourierorder,
                'finalparams': None,
                'finalparamerrs': None,
                'initialfit': initialfit,
                'fitmags': None,
                'fitperiod': None,
                'fitepoch': None,
                'actual_fitepoch': None,
            },
            'fitchisq': npnan,
            'fitredchisq': npnan,
            'fitplotfile': None,
            'magseries': {
                'times': ptimes,
                'phase': phase,
                'mags': pmags,
                'errs': perrs,
                'magsarefluxes': magsarefluxes
            }
        }
Example #11
0
def spline_fit_magseries(times,
                         mags,
                         errs,
                         period,
                         knotfraction=0.01,
                         maxknots=30,
                         sigclip=30.0,
                         plotfit=False,
                         ignoreinitfail=False,
                         magsarefluxes=False,
                         verbose=True):
    '''This fits a univariate cubic spline to the phased light curve.

    This fit may be better than the Fourier fit for sharply variable objects,
    like EBs, so can be used to distinguish them from other types of variables.

    The knot fraction is the number of internal knots to use for the spline. A
    value of 0.01 (or 1%) of the total number of non-nan observations appears to
    work quite well, without over-fitting. maxknots controls the maximum number
    of knots that will be allowed.

    magsarefluxes is a boolean value for setting the ylabel and ylimits of
    plots for either magnitudes (False) or flux units (i.e. normalized to 1, in
    which case magsarefluxes should be set to True).

    Returns the chisq of the fit, as well as the reduced chisq. FIXME: check
    this equation below to see if it's right.

    reduced_chisq = fit_chisq/(len(pmags) - len(knots) - 1)

    '''

    # this is required to fit the spline correctly
    if errs is None:
        errs = npfull_like(mags, 0.005)

    # sigclip the magnitude time series
    stimes, smags, serrs = sigclip_magseries(times,
                                             mags,
                                             errs,
                                             sigclip=sigclip,
                                             magsarefluxes=magsarefluxes)
    # get rid of zero errs
    nzind = npnonzero(serrs)
    stimes, smags, serrs = stimes[nzind], smags[nzind], serrs[nzind]

    # phase the mag series
    phase, pmags, perrs, ptimes, mintime = (_get_phased_quantities(
        stimes, smags, serrs, period))

    # now figure out the number of knots up to max knots (=100)
    nobs = len(phase)
    nknots = int(npfloor(knotfraction * nobs))
    nknots = maxknots if nknots > maxknots else nknots
    splineknots = nplinspace(phase[0] + 0.01, phase[-1] - 0.01, num=nknots)

    # NOTE: newer scipy needs x to be strictly increasing. this means we should
    # filter out anything that doesn't have np.diff(phase) > 0.0
    # FIXME: this needs to be tested
    phase_diffs_ind = npdiff(phase) > 0.0
    incphase_ind = npconcatenate((nparray([True]), phase_diffs_ind))
    phase, pmags, perrs = (phase[incphase_ind], pmags[incphase_ind],
                           perrs[incphase_ind])

    # generate and fit the spline
    spl = LSQUnivariateSpline(phase, pmags, t=splineknots, w=1.0 / perrs)

    # calculate the spline fit to the actual phases, the chisq and red-chisq
    fitmags = spl(phase)

    fitchisq = npsum(((fitmags - pmags) * (fitmags - pmags)) / (perrs * perrs))

    fitredchisq = fitchisq / (len(pmags) - nknots - 1)

    if verbose:
        LOGINFO('spline fit done. nknots = %s,  '
                'chisq = %.5f, reduced chisq = %.5f' %
                (nknots, fitchisq, fitredchisq))

    # figure out the time of light curve minimum (i.e. the fit epoch)
    # this is when the fit mag is maximum (i.e. the faintest)
    # or if magsarefluxes = True, then this is when fit flux is minimum
    if not magsarefluxes:
        fitmagminind = npwhere(fitmags == npmax(fitmags))
    else:
        fitmagminind = npwhere(fitmags == npmin(fitmags))
    magseriesepoch = ptimes[fitmagminind]

    # assemble the returndict
    returndict = {
        'fittype': 'spline',
        'fitinfo': {
            'nknots': nknots,
            'fitmags': fitmags,
            'fitepoch': magseriesepoch
        },
        'fitchisq': fitchisq,
        'fitredchisq': fitredchisq,
        'fitplotfile': None,
        'magseries': {
            'times': ptimes,
            'phase': phase,
            'mags': pmags,
            'errs': perrs,
            'magsarefluxes': magsarefluxes
        },
    }

    # make the fit plot if required
    if plotfit and isinstance(plotfit, str):

        _make_fit_plot(phase,
                       pmags,
                       perrs,
                       fitmags,
                       period,
                       mintime,
                       magseriesepoch,
                       plotfit,
                       magsarefluxes=magsarefluxes)

        returndict['fitplotfile'] = plotfit

    return returndict
Example #12
0
def test_concatenate():

    from numpy import concatenate as npconcatenate
    x = arange(2 * 3 * 4).reshape((2, 3, 4))
    b = concatenate((x, x))
    assert allclose(npconcatenate((x, x)), b.toarray())
Example #13
0
def spline_fit_magseries(times, mags, errs, period,
                         knotfraction=0.01,
                         maxknots=30,
                         sigclip=30.0,
                         plotfit=False,
                         ignoreinitfail=False,
                         magsarefluxes=False,
                         verbose=True):

    '''This fits a univariate cubic spline to the phased light curve.

    This fit may be better than the Fourier fit for sharply variable objects,
    like EBs, so can be used to distinguish them from other types of variables.

    Parameters
    ----------

    times,mags,errs : np.array
        The input mag/flux time-series to fit a spline to.

    period : float
        The period to use for the spline fit.

    knotfraction : float
        The knot fraction is the number of internal knots to use for the
        spline. A value of 0.01 (or 1%) of the total number of non-nan
        observations appears to work quite well, without over-fitting. maxknots
        controls the maximum number of knots that will be allowed.

    maxknots : int
        The maximum number of knots that will be used even if `knotfraction`
        gives a value to use larger than `maxknots`. This helps dealing with
        over-fitting to short time-scale variations.

    sigclip : float or int or sequence of two floats/ints or None
        If a single float or int, a symmetric sigma-clip will be performed using
        the number provided as the sigma-multiplier to cut out from the input
        time-series.

        If a list of two ints/floats is provided, the function will perform an
        'asymmetric' sigma-clip. The first element in this list is the sigma
        value to use for fainter flux/mag values; the second element in this
        list is the sigma value to use for brighter flux/mag values. For
        example, `sigclip=[10., 3.]`, will sigclip out greater than 10-sigma
        dimmings and greater than 3-sigma brightenings. Here the meaning of
        "dimming" and "brightening" is set by *physics* (not the magnitude
        system), which is why the `magsarefluxes` kwarg must be correctly set.

        If `sigclip` is None, no sigma-clipping will be performed, and the
        time-series (with non-finite elems removed) will be passed through to
        the output.

    magsarefluxes : bool
        If True, will treat the input values of `mags` as fluxes for purposes of
        plotting the fit and sig-clipping.

    plotfit : str or False
        If this is a string, this function will make a plot for the fit to the
        mag/flux time-series and writes the plot to the path specified here.

    ignoreinitfail : bool
        If this is True, ignores the initial failure to find a set of optimized
        Fourier parameters using the global optimization function and proceeds
        to do a least-squares fit anyway.

    verbose : bool
        If True, will indicate progress and warn of any problems.

    Returns
    -------

    dict
        This function returns a dict containing the model fit parameters, the
        minimized chi-sq value and the reduced chi-sq value. The form of this
        dict is mostly standardized across all functions in this module::

            {
                'fittype':'spline',
                'fitinfo':{
                    'nknots': the number of knots used for the fit
                    'fitmags': the model fit mags,
                    'fitepoch': the epoch of minimum light for the fit,
                },
                'fitchisq': the minimized value of the fit's chi-sq,
                'fitredchisq':the reduced chi-sq value,
                'fitplotfile': the output fit plot if fitplot is not None,
                'magseries':{
                    'times':input times in phase order of the model,
                    'phase':the phases of the model mags,
                    'mags':input mags/fluxes in the phase order of the model,
                    'errs':errs in the phase order of the model,
                    'magsarefluxes':input value of magsarefluxes kwarg
                }
            }

    '''

    # this is required to fit the spline correctly
    if errs is None:
        errs = npfull_like(mags, 0.005)

    # sigclip the magnitude time series
    stimes, smags, serrs = sigclip_magseries(times, mags, errs,
                                             sigclip=sigclip,
                                             magsarefluxes=magsarefluxes)
    # get rid of zero errs
    nzind = npnonzero(serrs)
    stimes, smags, serrs = stimes[nzind], smags[nzind], serrs[nzind]

    # phase the mag series
    phase, pmags, perrs, ptimes, mintime = (
        get_phased_quantities(stimes, smags, serrs, period)
    )

    # now figure out the number of knots up to max knots (=100)
    nobs = len(phase)
    nknots = int(npfloor(knotfraction*nobs))
    nknots = maxknots if nknots > maxknots else nknots
    splineknots = nplinspace(phase[0] + 0.01,
                             phase[-1] - 0.01,
                             num=nknots)

    # NOTE: newer scipy needs x to be strictly increasing. this means we should
    # filter out anything that doesn't have np.diff(phase) > 0.0
    # FIXME: this needs to be tested
    phase_diffs_ind = npdiff(phase) > 0.0
    incphase_ind = npconcatenate((nparray([True]), phase_diffs_ind))
    phase, pmags, perrs = (phase[incphase_ind],
                           pmags[incphase_ind],
                           perrs[incphase_ind])

    # generate and fit the spline
    spl = LSQUnivariateSpline(phase, pmags, t=splineknots, w=1.0/perrs)

    # calculate the spline fit to the actual phases, the chisq and red-chisq
    fitmags = spl(phase)

    fitchisq = npsum(
        ((fitmags - pmags)*(fitmags - pmags)) / (perrs*perrs)
    )

    fitredchisq = fitchisq/(len(pmags) - nknots - 1)

    if verbose:
        LOGINFO(
            'spline fit done. nknots = %s,  '
            'chisq = %.5f, reduced chisq = %.5f' %
            (nknots, fitchisq, fitredchisq)
        )

    # figure out the time of light curve minimum (i.e. the fit epoch)
    # this is when the fit mag is maximum (i.e. the faintest)
    # or if magsarefluxes = True, then this is when fit flux is minimum
    if not magsarefluxes:
        fitmagminind = npwhere(fitmags == npmax(fitmags))
    else:
        fitmagminind = npwhere(fitmags == npmin(fitmags))
    if len(fitmagminind[0]) > 1:
        fitmagminind = (fitmagminind[0][0],)
    magseriesepoch = ptimes[fitmagminind]

    # assemble the returndict
    returndict = {
        'fittype':'spline',
        'fitinfo':{
            'nknots':nknots,
            'fitmags':fitmags,
            'fitepoch':magseriesepoch
        },
        'fitchisq':fitchisq,
        'fitredchisq':fitredchisq,
        'fitplotfile':None,
        'magseries':{
            'times':ptimes,
            'phase':phase,
            'mags':pmags,
            'errs':perrs,
            'magsarefluxes':magsarefluxes
        },
    }

    # make the fit plot if required
    if plotfit and isinstance(plotfit, str):

        make_fit_plot(phase, pmags, perrs, fitmags,
                      period, mintime, magseriesepoch,
                      plotfit,
                      magsarefluxes=magsarefluxes)

        returndict['fitplotfile'] = plotfit

    return returndict
    def fit_diffeomorphism_model(self,
                                 X,
                                 t,
                                 X_d,
                                 learning_rate=1e-2,
                                 learning_decay=0.95,
                                 n_epochs=50,
                                 train_frac=0.8,
                                 l2=1e1,
                                 batch_size=64,
                                 initialize=True,
                                 verbose=True,
                                 X_val=None,
                                 t_val=None,
                                 Xd_val=None):
        """fit_diffeomorphism_model 
        
        Arguments:
            X {numpy array [Ntraj,Nt,Ns]} -- state
            t {numpy array [Ntraj,Nt]} -- time vector
            X_d {numpy array [Ntraj,Nt,Ns]} -- desired state
        
        Keyword Arguments:
            learning_rate {[type]} --  (default: {1e-2})
            learning_decay {float} --  (default: {0.95})
            n_epochs {int} --  (default: {50})
            train_frac {float} -- ratio of training and testing (default: {0.8})
            l2 {[type]} -- L2 penalty term (default: {1e1})
            jacobian_penalty {[type]} --  (default: {1.})
            batch_size {int} --  (default: {64})
            initialize {bool} -- flag to warm start (default: {True})
            verbose {bool} --  (default: {True})
            X_val {numpy array [Ntraj,Nt,Ns]} -- state in validation set (default: {None})
            t_val {numpy array [Ntraj,Nt]} -- time in validation set (default: {None})
            Xd_val {numpy array [Ntraj,Nt,Ns]} -- desired state in validation set (default: {None})
        
        Returns:
            float -- val_losses[-1]
        """
        device = 'cuda' if cuda.is_available() else 'cpu'
        X, X_dot, X_d, X_d_dot, t = self.process(X=X, t=t, X_d=X_d)

        # Prepare data for pytorch:
        manual_seed(42)  # Fix seed for reproducibility
        if self.traj_input:
            X_tensor = from_numpy(
                npconcatenate(
                    (X, X_d, X_dot, X_d_dot, np.zeros_like(X)),
                    axis=1))  #[x (1,n), x_d (1,n), x_dot (1,n), zeros (1,n)]
        else:
            X_tensor = from_numpy(
                npconcatenate(
                    (X, X_dot, np.zeros_like(X)),
                    axis=1))  # [x (1,n), x_d (1,n), x_dot (1,n), zeros (1,n)]
        y_target = X_dot - (dot(self.A_cl, X.T) + dot(self.BK, X_d.T)).T
        y_tensor = from_numpy(y_target)
        X_tensor.requires_grad_(True)

        # Builds dataset with all data
        dataset = TensorDataset(X_tensor, y_tensor)

        if X_val is None or t_val is None or Xd_val is None:
            # Splits randomly into train and validation datasets
            n_train = int(train_frac * X.shape[0])
            n_val = X.shape[0] - n_train
            train_dataset, val_dataset = random_split(dataset,
                                                      [n_train, n_val])
            # Builds a loader for each dataset to perform mini-batch gradient descent
            train_loader = DataLoader(dataset=train_dataset,
                                      batch_size=batch_size,
                                      shuffle=True)
            val_loader = DataLoader(dataset=val_dataset, batch_size=batch_size)
        else:
            #Uses X,... as training data and X_val,... as validation data
            X_val, X_dot_val, Xd_val, Xd_dot_val, t_val = self.process(
                X=X_val, t=t_val, X_d=Xd_val)
            if self.traj_input:
                X_val_tensor = from_numpy(
                    npconcatenate((X_val, Xd_val, X_dot_val, Xd_dot_val,
                                   np.zeros_like(X_val)),
                                  axis=1)
                )  #[x (1,n), x_d (1,n), x_dot (1,n), zeros (1,n)]
            else:
                X_val_tensor = from_numpy(
                    npconcatenate(
                        (X_val, X_dot_val, np.zeros_like(X_val)),
                        axis=1))  # [x (1,n), x_dot (1,n), zeros (1,n)]
            y_target_val = X_dot_val - dot(self.A_cl,
                                           X_val.T + dot(self.BK, Xd_val.T)).T
            y_val_tensor = from_numpy(y_target_val)
            X_val_tensor.requires_grad_(True)
            val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
            # Builds a loader for each dataset to perform mini-batch gradient descent
            train_loader = DataLoader(dataset=dataset,
                                      batch_size=int(batch_size),
                                      shuffle=True)
            val_loader = DataLoader(dataset=val_dataset,
                                    batch_size=int(batch_size))

        # Set up optimizer and learning rate scheduler:
        optimizer = optim.Adam(self.diffeomorphism_model.parameters(),
                               lr=learning_rate,
                               weight_decay=l2)
        lambda1 = lambda epoch: learning_decay**epoch
        scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda1)

        def make_train_step(model, loss_fn, optimizer):
            def train_step(x, y):
                model.train()  # Set model to training mode
                y_pred = model(x)
                loss = loss_fn(y, y_pred, model.training)
                loss.backward()
                optimizer.step()
                return loss.item()

            return train_step

        batch_loss = []
        losses = []
        batch_val_loss = []
        val_losses = []
        train_step = make_train_step(
            self.diffeomorphism_model,
            self.diffeomorphism_model.diffeomorphism_loss, optimizer)

        # Initialize model weights:
        def init_normal(m):
            if type(m) == nn.Linear:
                nn.init.xavier_normal_(m.weight)

        if initialize:
            self.diffeomorphism_model.apply(init_normal)

        # Training loop
        for i in range(n_epochs):
            # Uses loader to fetch one mini-batch for training
            #print('Training epoch ', i)
            for x_batch, y_batch in train_loader:
                # Send mini batch data to same location as model:
                x_batch = x_batch.to(device)
                y_batch = y_batch.to(device)
                #print('Training: ', x_batch.shape, y_batch.shape)
                # Train based on current batch:
                batch_loss.append(train_step(x_batch, y_batch))
                optimizer.zero_grad()
            losses.append(sum(batch_loss) / len(batch_loss))
            batch_loss = []

            #print('Validating epoch ', i)
            with no_grad():
                for x_val, y_val in val_loader:
                    # Sends data to same device as model
                    x_val = x_val.to(device)
                    y_val = y_val.to(device)

                    #print('Validation: ', x_val.shape, y_val.shape)

                    self.diffeomorphism_model.eval(
                    )  # Change model model to evaluation
                    #xt_val = x_val[:, :2*self.n]  # [x, x_d]
                    #xdot_val = x_val[:, 2*self.n:]  # [xdot]
                    y_pred = self.diffeomorphism_model(x_val)  # Predict
                    #jacobian_xdot_val, zero_jacobian_val = calc_gradients(xt_val, xdot_val, yhat, None, None, self.diffeomorphism_model.training)
                    batch_val_loss.append(
                        float(
                            self.diffeomorphism_model.diffeomorphism_loss(
                                y_val, y_pred, self.diffeomorphism_model.
                                training)))  # Compute validation loss
                val_losses.append(sum(batch_val_loss) /
                                  len(batch_val_loss))  # Save validation loss
                batch_val_loss = []

            scheduler.step(i)
            if verbose:
                print(' - Epoch: ', i, ' Training loss:',
                      format(losses[-1], '08f'), ' Validation loss:',
                      format(val_losses[-1], '08f'))
                print(
                    'Improvement metric (for early stopping): ',
                    sum(
                        abs(
                            array(val_losses[-min(3, len(val_losses)):]) -
                            val_losses[-1])) /
                    (3 * val_losses[-min(3, len(val_losses))]))
            if i > n_epochs / 4 and sum(
                    abs(
                        array(val_losses[-min(3, len(val_losses)):]) -
                        val_losses[-1])) / (
                            3 * val_losses[-min(3, len(val_losses))]) < 0.01:
                #print('Early stopping activated')
                break

        return val_losses[-1]
Example #15
0
def macf_period_find(
        times,
        mags,
        errs,
        fillgaps=0.0,
        filterwindow=11,
        forcetimebin=None,
        maxlags=None,
        maxacfpeaks=10,
        smoothacf=21,  # set for Kepler-type LCs, see details below
        smoothfunc=_smooth_acf_savgol,
        smoothfunckwargs=None,
        magsarefluxes=False,
        sigclip=3.0,
        verbose=True,
        periodepsilon=0.1,  # doesn't do anything, for consistent external API
        nworkers=None,  # doesn't do anything, for consistent external API
        startp=None,  # doesn't do anything, for consistent external API
        endp=None,  # doesn't do anything, for consistent external API
        autofreq=None,  # doesn't do anything, for consistent external API
        stepsize=None,  # doesn't do anything, for consistent external API
):
    '''This finds periods using the McQuillan+ (2013a, 2014) ACF method.

    The kwargs from `periodepsilon` to `stepsize` don't do anything but are used
    to present a consistent API for all periodbase period-finders to an outside
    driver (e.g. the one in the checkplotserver).

    Parameters
    ----------

    times,mags,errs : np.array
        The input magnitude/flux time-series to run the period-finding for.

    fillgaps : 'noiselevel' or float
        This sets what to use to fill in gaps in the time series. If this is
        'noiselevel', will smooth the light curve using a point window size of
        `filterwindow` (this should be an odd integer), subtract the smoothed LC
        from the actual LC and estimate the RMS. This RMS will be used to fill
        in the gaps. Other useful values here are 0.0, and npnan.

    filterwindow : int
        The light curve's smoothing filter window size to use if
        `fillgaps='noiselevel`'.

    forcetimebin : None or float
        This is used to force a particular cadence in the light curve other than
        the automatically determined cadence. This effectively rebins the light
        curve to this cadence. This should be in the same time units as `times`.

    maxlags : None or int
        This is the maximum number of lags to calculate. If None, will calculate
        all lags.

    maxacfpeaks : int
        This is the maximum number of ACF peaks to use when finding the highest
        peak and obtaining a fit period.

    smoothacf : int
        This is the number of points to use as the window size when smoothing
        the ACF with the `smoothfunc`. This should be an odd integer value. If
        this is None, will not smooth the ACF, but this will probably lead to
        finding spurious peaks in a generally noisy ACF.

        For Kepler, a value between 21 and 51 seems to work fine. For ground
        based data, much larger values may be necessary: between 1001 and 2001
        seem to work best for the HAT surveys. This is dependent on cadence, RMS
        of the light curve, the periods of the objects you're looking for, and
        finally, any correlated noise in the light curve. Make a plot of the
        smoothed/unsmoothed ACF vs. lag using the result dict of this function
        and the `plot_acf_results` function above to see the identified ACF
        peaks and what kind of smoothing might be needed.

        The value of `smoothacf` will also be used to figure out the interval to
        use when searching for local peaks in the ACF: this interval is 1/2 of
        the `smoothacf` value.

    smoothfunc : Python function
        This is the function that will be used to smooth the ACF. This should
        take at least one kwarg: 'windowsize'. Other kwargs can be passed in
        using a dict provided in `smoothfunckwargs`. By default, this uses a
        Savitsky-Golay filter, a Gaussian filter is also provided but not
        used. Another good option would be an actual low-pass filter (generated
        using scipy.signal?) to remove all high frequency noise from the ACF.

    smoothfunckwargs : dict or None
        The dict of optional kwargs to pass in to the `smoothfunc`.

    magsarefluxes : bool
        If your input measurements in `mags` are actually fluxes instead of
        mags, set this is True.

    sigclip : float or int or sequence of two floats/ints or None
        If a single float or int, a symmetric sigma-clip will be performed using
        the number provided as the sigma-multiplier to cut out from the input
        time-series.

        If a list of two ints/floats is provided, the function will perform an
        'asymmetric' sigma-clip. The first element in this list is the sigma
        value to use for fainter flux/mag values; the second element in this
        list is the sigma value to use for brighter flux/mag values. For
        example, `sigclip=[10., 3.]`, will sigclip out greater than 10-sigma
        dimmings and greater than 3-sigma brightenings. Here the meaning of
        "dimming" and "brightening" is set by *physics* (not the magnitude
        system), which is why the `magsarefluxes` kwarg must be correctly set.

        If `sigclip` is None, no sigma-clipping will be performed, and the
        time-series (with non-finite elems removed) will be passed through to
        the output.

    verbose : bool
        If True, will indicate progress and report errors.

    Returns
    -------

    dict
        Returns a dict with results. dict['bestperiod'] is the estimated best
        period and dict['fitperiodrms'] is its estimated error. Other
        interesting things in the output include:

        - dict['acfresults']: all results from calculating the ACF. in
          particular, the unsmoothed ACF might be of interest:
          dict['acfresults']['acf'] and dict['acfresults']['lags'].

        - dict['lags'] and dict['acf'] contain the ACF after smoothing was
          applied.

        - dict['periods'] and dict['lspvals'] can be used to construct a
          pseudo-periodogram.

        - dict['naivebestperiod'] is obtained by multiplying the lag at the
          highest ACF peak with the cadence. This is usually close to the fit
          period (dict['fitbestperiod']), which is calculated by doing a fit to
          the lags vs. peak index relation as in McQuillan+ 2014.

    '''

    # get the ACF
    acfres = autocorr_magseries(times,
                                mags,
                                errs,
                                maxlags=maxlags,
                                fillgaps=fillgaps,
                                forcetimebin=forcetimebin,
                                sigclip=sigclip,
                                magsarefluxes=magsarefluxes,
                                filterwindow=filterwindow,
                                verbose=verbose)

    xlags = acfres['lags']

    # smooth the ACF if requested
    if smoothacf and isinstance(smoothacf, int) and smoothacf > 0:

        if smoothfunckwargs is None:
            sfkwargs = {'windowsize': smoothacf}
        else:
            sfkwargs = smoothfunckwargs.copy()
            sfkwargs.update({'windowsize': smoothacf})

        xacf = smoothfunc(acfres['acf'], **sfkwargs)

    else:

        xacf = acfres['acf']

    # get the relative peak heights and fit best lag
    peakres = _get_acf_peakheights(xlags,
                                   xacf,
                                   npeaks=maxacfpeaks,
                                   searchinterval=int(smoothacf / 2))

    # this is the best period's best ACF peak height
    bestlspval = peakres['bestpeakheight']

    try:

        # get the fit best lag from a linear fit to the peak index vs time(peak
        # lag) function as in McQillian+ (2014)
        fity = npconcatenate(([
            0.0, peakres['bestlag']
        ], peakres['relpeaklags'][peakres['relpeaklags'] > peakres['bestlag']]
                              ))
        fity = fity * acfres['cadence']
        fitx = nparange(fity.size)

        fitcoeffs, fitcovar = nppolyfit(fitx, fity, 1, cov=True)

        # fit best period is the gradient of fit
        fitbestperiod = fitcoeffs[0]
        bestperiodrms = npsqrt(fitcovar[0, 0])  # from the covariance matrix

    except Exception as e:

        LOGWARNING('linear fit to time at each peak lag '
                   'value vs. peak number failed, '
                   'naively calculated ACF period may not be accurate')
        fitcoeffs = nparray([npnan, npnan])
        fitcovar = nparray([[npnan, npnan], [npnan, npnan]])
        fitbestperiod = npnan
        bestperiodrms = npnan
        raise

    # calculate the naive best period using delta_tau = lag * cadence
    naivebestperiod = peakres['bestlag'] * acfres['cadence']

    if fitbestperiod < naivebestperiod:
        LOGWARNING('fit bestperiod = %.5f may be an alias, '
                   'naively calculated bestperiod is = %.5f' %
                   (fitbestperiod, naivebestperiod))

    if npisfinite(fitbestperiod):
        bestperiod = fitbestperiod
    else:
        bestperiod = naivebestperiod

    return {
        'bestperiod':
        bestperiod,
        'bestlspval':
        bestlspval,
        'nbestpeaks':
        maxacfpeaks,
        # for compliance with the common pfmethod API
        'nbestperiods':
        npconcatenate([[fitbestperiod], peakres['relpeaklags'][1:maxacfpeaks] *
                       acfres['cadence']]),
        'nbestlspvals':
        peakres['maxacfs'][:maxacfpeaks],
        'lspvals':
        xacf,
        'periods':
        xlags * acfres['cadence'],
        'acf':
        xacf,
        'lags':
        xlags,
        'method':
        'acf',
        'naivebestperiod':
        naivebestperiod,
        'fitbestperiod':
        fitbestperiod,
        'fitperiodrms':
        bestperiodrms,
        'periodfitcoeffs':
        fitcoeffs,
        'periodfitcovar':
        fitcovar,
        'kwargs': {
            'maxlags': maxlags,
            'maxacfpeaks': maxacfpeaks,
            'fillgaps': fillgaps,
            'filterwindow': filterwindow,
            'smoothacf': smoothacf,
            'smoothfunckwargs': sfkwargs,
            'magsarefluxes': magsarefluxes,
            'sigclip': sigclip
        },
        'acfresults':
        acfres,
        'acfpeaks':
        peakres
    }
Example #16
0
 def combine_raw_data(self):
     '''
     combine them
     into single output variable
     '''
     # get a list of all txt files in inputdir, sorted by filename
     filelist = sorted(glob.glob(os.path.join(self.inputdir, '*.txt')))
     if len(filelist) == 0:
         raise IOError('No files found in ' + self.inputdir)
     for inputfile in filelist:
         with open(inputfile, 'r') as csvin:
             reader = csv.DictReader(csvin, delimiter=',')
             try:
                 self.data
             except AttributeError:
                 #reader.next()
                 try:
                     self.data = {k.strip(): [utils.fitem(v)] for k, v in
                                  reader.next().items() if k is not None}
                 except StopIteration:
                     pass
             current_row = 0
             for line in reader:
                 current_row += 1
                 if current_row == 1:  # header
                     # skip the header
                     continue
                 elif line['Time'] == '<br>':
                     # not a valid csv line, so skip
                     continue
                 else:
                     try:
                         datetime.strptime(line[self.dateUTCstring],
                                           ('%Y-%m-%d %H:%M:%S'))
                     except ValueError:
                         # Not a valid csv line, so skip
                         continue
                 lenDateUTC = len(self.data['DateUTC'])
                 for k, v in line.items():
                     if k is not None and k in self.data.keys():  # skip over empty fields
                         k = k.strip()
                         try:
                             addnones = lenDateUTC - len(self.data[k])
                         except NameError:
                             addnones = 0
                         if addnones > 0:
                             toadd = ['' for c in range(0,addnones)]
                             self.data[k] = npconcatenate((
                                 self.data[k], toadd)).tolist()
                         self.data[k].append(utils.fitem(v))
     # check if we need to add empty values at the end of the lists
     lenDateUTC = len(self.data['DateUTC'])
     for var in self.data.keys():
         try:
             addnones = lenDateUTC - len(self.data[var])
         except NameError:
             addnones = 0
         if addnones > 0:
             toadd = ['' for c in range(0,addnones)]
             self.data[var] = npconcatenate((
                 self.data[var], toadd)).tolist()         
     # verify that everything is sorted with time
     if not self.verify_sorting():
         # sort data if needed according to time
         self.sort_data()