예제 #1
0
    def __init__(self, samples, engine):

        self.samples = samples
        self.engine = engine
        m = self.m = len(samples)
        if mics.verbose:
            # np.set_printoptions(precision=4, threshold=15, edgeitems=4, suppress=True)
            info("\n=== Setting up mixture ===")
            info("Analysis method: ", self.engine.__class__.__name__)
            info("Number of samples:", m)

        if m == 0:
            raise InputError("list of samples is empty")

        self.n = np.array([len(sample.dataset) for sample in samples])
        self.neff = np.array([sample.neff for sample in samples])
        names = self.names = list(samples[0].dataset.columns)
        if mics.verbose:
            info("Sample sizes:", self.n)
            info("Effective sample sizes:", self.neff)
            info("Properties:", ", ".join(names))

        potentials = [sample.potential.lambdify() for sample in samples]
        self.u = [multimap(potentials, sample.dataset) for sample in samples]
        self.f = bennett(self.u)
        mics.verbose and info("Initial free-energy guess:", self.f)
        self.engine.__initialize__(self)
예제 #2
0
 def histograms(self, property='potential', bins=100, **constants):
     """
     """
     if property == 'potential':
         y = [multimap([sample.potential.lambdify()], sample.dataset) for sample in self]
     else:
         names = list(self[0].dataset.columns)
         function = [func(property, names, constants).lambdify()]
         y = [multimap(function, sample.dataset) for sample in self]
     ymin = min([np.amin(x) for x in y])
     ymax = max([np.amax(x) for x in y])
     delta = (ymax - ymin)/bins
     center = [ymin + delta*(i + 0.5) for i in range(bins)]
     frame = pd.DataFrame({property: center})
     for i in range(len(self)):
         frame["state %s" % (i+1)] = np.histogram(y[i], bins, (ymin, ymax))[0]
     return frame
예제 #3
0
 def __compute__(self, functions, constants):
     try:
         if isinstance(functions, str):
             funcs = [func(functions, self.names, constants).lambdify()]
         else:
             funcs = [
                 func(f, self.names, constants).lambdify()
                 for f in functions
             ]
         return [multimap(funcs, sample.dataset) for sample in self.samples]
     except (InputError, KeyError):
         return None
예제 #4
0
파일: samples.py 프로젝트: craabreu/mics
    def averaging(self, properties, combinations={}, **constants):
        """
        Computes averages and uncertainties of configurational properties. In
        addition, computes combinations among these averages while automatically
        handling uncertainty propagation.

        Parameters
        ----------
            properties : dict(str: str)
                A dictionary associating names to mathematical expressions. This
                is used to define functions of the collective variables included
                in the samples. Then, averages of these functions will be
                evaluated at all sampled states, along with their uncertainties.
                The expressions might also depend on parameters passed as
                keyword arguments (see below).
            combinations : dict(str: str), optional, default={}
                A dictionary associating names to mathematical expressions. This
                is used to define functions of the names passed as keys in the
                `properties` dictionary. The expressions might also depend on
                parameters passed as keyword arguments (see below).
            **constants : optional keyword arguments
                A set of arguments passed as ``name=value``, used to define
                parameter values for evaluating the mathematical expressions
                in both `properties` and `combinations`.

        Returns
        -------
            pandas.DataFrame
                A data frame containing the computed averages and combinations,
                as well as their estimated standard errors.

        """
        variables = self.dataset.columns.tolist()
        functions = [
            func(f, variables, constants).lambdify()
            for f in properties.values()
        ]
        y = multimap(functions, self.dataset)
        ym = np.mean(y, axis=1)
        Theta = covariance(y, ym, self.b)
        result = propertyDict(properties.keys(), ym, stdError(Theta))
        if combinations:
            delta = deltaMethod(combinations.values(), properties.keys(),
                                constants)
            (h, dh) = delta.evaluate(ym, Theta)
            result.update(propertyDict(combinations.keys(), h, dh))
        return result.to_frame(0)
예제 #5
0
파일: samples.py 프로젝트: craabreu/mics
    def subsampling(self, integratedACF=True):
        """
        Performs inline subsampling based on the statistical inefficiency ``g``
        of the specified attribute `acfun` of :class:`sample`, aiming at
        obtaining a sample of :term:`IID` configurations. Subsampling is done
        via jumps of varying sizes around ``g``, so that the sample size decays
        by a factor of approximately ``1/g``.

        Parameters
        ----------
            integratedACF : bool, optional, default=True
                If true, the integrated :term:`ACF` method :cite:`Chodera_2007`
                will be used for computing the statistical inefficiency.
                Otherwise, the :term:`OBM` method will be used instead.

        Returns
        -------
            :class:`sample`
                Although the subsampling is done inline, the new sample is
                returned for chaining purposes.

        """
        n = len(self.dataset)
        if mics.verbose:
            info("\n=== Subsampling via %s ===" %
                 ("integrated ACF" if integratedACF else "OBM"))
            info("Original sample size:", n)
        if integratedACF:
            y = multimap([self.acfun.lambdify()], self.dataset)
            g = timeseries.statisticalInefficiency(y[0])
        else:
            g = n / self.neff
        new = timeseries.subsampleCorrelatedData(self.dataset.index, g)
        self.dataset = self.dataset.reindex(new)
        self.neff = len(new)
        if mics.verbose:
            info("Statistical inefficiency:", g)
            info("New sample size:", self.neff)
        return self
예제 #6
0
파일: samples.py 프로젝트: craabreu/mics
    def __init__(self,
                 dataset,
                 potential,
                 acfun=None,
                 batchsize=None,
                 **constants):
        names = dataset.columns.tolist()
        n = len(dataset)
        b = self.b = batchsize if batchsize else int(np.sqrt(n))

        if mics.verbose:
            info("\n=== Setting up new sample ===")
            info("Properties:", ", ".join(names))
            info("Constants:", constants)
            info("Reduced potential function:", potential)
            info("Autocorrelation analysis function:",
                 acfun if acfun else potential)
            info("Sample size:", n)
            info("Batch size:", b)

        self.dataset = dataset
        self.potential = func(potential, names, constants)
        self.acfun = self.potential if acfun is None else func(
            acfun, names, constants)
        y = multimap([self.acfun.lambdify()], dataset)
        ym = np.mean(y, axis=1)
        S1 = covariance(y, ym, 1).item(0)
        Sb = covariance(y, ym, b).item(0)
        if not (np.isfinite(S1) and np.isfinite(Sb)):
            raise FloatingPointError(
                "unable to determine effective sample size")
        self.neff = n * S1 / Sb

        if mics.verbose:
            info("Variance disregarding autocorrelation:", S1)
            info("Variance via Overlapping Batch Means:", Sb)
            info("Effective sample size:", self.neff)