Beispiel #1
0
 def autocorrelation(self, inputData, nMax):
     predictions = self.predict(inputData, n=1)
     output = np.squeeze(np.array(predictions)).T
     
     valFunc=0
     accepted=0
     
     for x in range(len(output)):
         temp = (integrated_time(output[x], tol=5, quiet=True))
         if(not math.isnan(temp)):
             valFunc += np.array((function_1d(output[x])))
             accepted+=1
     
     valFunc=valFunc/accepted
     if(nMax<len(valFunc)):
         valFunc = valFunc[:nMax]
     
     return(valFunc)
Beispiel #2
0
def autocorrelation(mcmc_fit_instance,
                    correlations_to_plot=None,
                    flat_chain=None,
                    variable_labels=None):
    """
    Plots correlation function of defined parameters.

    :param mcmc_fit_instance: Union[elisa.analytics.binary_fit.lc_fit.LCFit, elisa.analytics.binary_fit.rv_fit.RVFit];
    :param correlations_to_plot: List; names of variables which autocorrelation function will be displayed
    :param flat_chain: numpy.array; flattened chain of all parameters
    :param variable_labels: List; list of variables during a MCMC run, which is used
                                  to identify columns in `flat_chain`
    """
    autocorr_plot_kwargs = dict()

    flat_chain = deepcopy(mcmc_fit_instance.flat_chain
                          ) if flat_chain is None else deepcopy(flat_chain)
    variable_labels = mcmc_fit_instance.variable_labels if variable_labels is None else variable_labels
    correlations_to_plot = variable_labels if correlations_to_plot is None else correlations_to_plot

    if flat_chain is None:
        raise ValueError('You can use trace plot only in case of mcmc method '
                         'or for some reason the flat chain was not found.')

    labels = serialize_plot_labels(variable_labels)

    autocorr_fns = np.empty((flat_chain.shape[0], len(variable_labels)))
    autocorr_time = np.empty((flat_chain.shape[0]))

    for i, lbl in enumerate(variable_labels):
        autocorr_fns[:, i] = function_1d(flat_chain[:, i])
        autocorr_time[i] = integrated_time(flat_chain[:, i], quiet=True)

    autocorr_plot_kwargs.update({
        'correlations_to_plot': correlations_to_plot,
        'autocorr_fns': autocorr_fns,
        'autocorr_time': autocorr_time,
        'variable_labels': variable_labels,
        'labels': labels
    })

    MCMCPlot.autocorr(**autocorr_plot_kwargs)
Beispiel #3
0
def convergenceVals(algor, ndim, varIdxs, chains_nruns, bi_steps):
    """
    Convergence statistics.
    """
    if algor == 'emcee':
        from emcee import autocorr

    with warnings.catch_warnings():
        warnings.simplefilter("ignore")

        if algor == 'ptemcee':
            # Mean Tau across chains, shape: (post-bi steps, ndims)
            x = np.mean(chains_nruns.T, axis=1).T
            tau_autocorr = []
            j = 10  # Here in case the line below is skipped
            for j in np.arange(50, x.shape[0], 50):
                # tau.shape: ndim
                tau = util.autocorr_integrated_time(x[:j])
                # Autocorrelation time. Mean across dimensions.
                tau_autocorr.append([bi_steps + j, np.mean(tau)])
            # Add one last point with the entire chain.
            if j < x.shape[0]:
                tau = util.autocorr_integrated_time(x)
                tau_autocorr.append([bi_steps + x.shape[0], np.mean(tau)])
            tau_autocorr = np.array(tau_autocorr).T
        elif algor == 'emcee':
            tau_autocorr = None

        # Autocorrelation time for each parameter, mean across chains.
        if algor == 'emcee':
            acorr_t = autocorr.integrated_time(chains_nruns, tol=0, quiet=True)
        elif algor == 'ptemcee':
            x = np.mean(chains_nruns.transpose(1, 0, 2), axis=0)
            acorr_t = util.autocorr_integrated_time(x)

        # Autocorrelation time for each chain for each parameter.
        logger = logging.getLogger()
        logger.disabled = True
        at = []
        # For each parameter/dimension
        for p in chains_nruns.T:
            at_p = []
            # For each chain for this parameter/dimension
            for c in p:
                if algor == 'emcee':
                    at_p.append(autocorr.integrated_time(c, quiet=True)[0])
                elif algor == 'ptemcee':
                    at_p.append(util.autocorr_integrated_time(c))
            at.append(at_p)
        logger.disabled = False
        # IAT for all chains and all parameters.
        all_taus = [item for subl in at for item in subl]

        # # Worst chain: chain with the largest acorr time.
        # max_at_c = [np.argmax(a) for a in at]
        # # Best chain: chain with the smallest acorr time.
        # min_at_c = [np.argmin(a) for a in at]
        # Chain with the closest IAT to the median
        med_at_c = [np.argmin(np.abs(np.median(a) - a)) for a in at]

        # Mean Geweke z-scores and autocorrelation functions for all chains.
        geweke_z, acorr_function = [[] for _ in range(ndim)],\
            [[] for _ in range(ndim)]
        for i, p in enumerate(chains_nruns.T):
            for c in p:
                try:
                    geweke_z[i].append(geweke(c))
                except ZeroDivisionError:
                    geweke_z[i].append([np.nan, np.nan])
                try:
                    if algor == 'emcee':
                        acorr_function[i].append(autocorr.function_1d(c))
                    elif algor == 'ptemcee':
                        acorr_function[i].append(util.autocorr_function(c))
                except FloatingPointError:
                    acorr_function[i].append([np.nan])
        # Mean across chains
        geweke_z = np.nanmean(geweke_z, axis=1)
        acorr_function = np.nanmean(acorr_function, axis=1)

        # # Cut the autocorrelation function just after *all* the parameters
        # # have crossed the zero line.
        # try:
        #     lag_zero = max([np.where(_ < 0)[0][0] for _ in acorr_function])
        # except IndexError:
        #     # Could not obtain zero lag
        #     lag_zero = acorr_function.shape[-1]
        # acorr_function = acorr_function[:, :int(lag_zero + .2 * lag_zero)]

        # # Approx IAT
        # lag_iat = 1. + 2. * np.sum(acorr_function, axis=1)
        # print("  Approx (zero lag) IAT: ", lag_iat)

        # Effective Sample Size (per param) = (nsteps / tau) * nchains
        mcmc_ess = (chains_nruns.shape[0] / acorr_t) * chains_nruns.shape[1]

        # TODO fix this function
        # # Minimum effective sample size (ESS), and multi-variable ESS.
        # minESS, mESS = fminESS(ndim), multiESS(chains_nruns)
        # # print("mESS: {}".format(mESS))
        # mESS_epsilon = [[], [], []]
        # for alpha in [.01, .05, .1, .2, .3, .4, .5, .6, .7, .8, .9, .95]:
        #     mESS_epsilon[0].append(alpha)
        #     mESS_epsilon[1].append(fminESS(ndim, alpha=alpha, ess=minESS))
        #     mESS_epsilon[2].append(fminESS(ndim, alpha=alpha, ess=mESS))

    return tau_autocorr, acorr_t, med_at_c, all_taus, geweke_z,\
        acorr_function, mcmc_ess
Beispiel #4
0
    def integrated_time(
        self, x, low=10, high=None, step=1, c=5, full_output=False,
            axis=0, fast=False):
        """Estimate the integrated autocorrelation time of a time series.

        This estimate uses the iterative procedure described on page 16 of
        `Sokal's notes <http://www.stat.unc.edu/faculty/cji/Sokal.pdf>`_ to
        determine a reasonable window size.

        Args:
            x: The time series. If multidimensional, set the time axis using
                the ``axis`` keyword argument and the function will be
                computed for every other axis.
            low (Optional[int]): The minimum window size to test. (default:
                ``10``)
            high (Optional[int]): The maximum window size to test. (default:
                ``x.shape[axis] / (2*c)``)
            step (Optional[int]): The step size for the window search.
                (default: ``1``)
            c (Optional[float]): The minimum number of autocorrelation times
                needed to trust the estimate. (default: ``10``)
            full_output (Optional[bool]): Return the final window size as well
                as the autocorrelation time. (default: ``False``)
            axis (Optional[int]): The time axis of ``x``. Assumed to be the
                first axis if not specified.
            fast (Optional[bool]): If ``True``, only use the first ``2^n`` (for
                the largest power) entries for efficiency. (default: False)

        Returns:
            float or array: An estimate of the integrated autocorrelation time
                of the time series ``x`` computed along the axis ``axis``.
            Optional[int]: The final window size that was used. Only returned
                if ``full_output`` is ``True``.

        Raises
            AutocorrError: If the autocorrelation time can't be reliably
                estimated from the chain. This normally means that the chain
                is too short.

        """
        size = 0.5 * x.shape[axis]
        if int(c * low) >= size:
            raise AutocorrError("The chain is too short")

        # Compute the autocorrelation function.
        f = function_1d(x)

        # Check the dimensions of the array.
        oned = len(f.shape) == 1
        m = [slice(None), ] * len(f.shape)

        # Loop over proposed window sizes until convergence is reached.
        if high is None:
            high = int(size / c)
        for M in np.arange(low, high, step).astype(int):
            # Compute the autocorrelation time with the given window.
            if oned:
                # Special case 1D for simplicity.
                tau = 1 + 2 * np.sum(f[1:M])
            else:
                # N-dimensional case.
                m[axis] = slice(1, M)
                tau = 1 + 2 * np.sum(f[m], axis=axis)

            # Accept the window size if it satisfies the convergence criterion.
            if np.all(tau > 1.0) and M > c * tau.max():
                if full_output:
                    return tau, M
                return tau

            # If the autocorrelation time is too long to be estimated reliably
            # from the chain, it should fail.
            if c * tau.max() >= size:
                break

        raise AutocorrError("The chain is too short to reliably estimate "
                            "the autocorrelation time")