Ejemplo n.º 1
0
    def __init__(self, means, covs, weights=None, lims=None, names=None, label='', labels=None):
        """
        :param means: list of means for each Gaussian in the mixture
        :param covs: list of covariances for the Gaussians in the mixture
        :param weights: optional weight for each component (defaults to equal weight)
        :param lims: optional list of hard limits for each parameter, [[x1min,x1max], [x2min,x2max]]; use None for no limit
        :param names: list of names (strings) for each parameter. If not set, set to "param1", "param2"...
        :param label: name for labelling this mixture
        :param labels: list of latex labels for each parameter. If not set, defaults to p_{1}, p_{2}...
        """

        self.means = np.asarray(means)
        self.dim = self.means.shape[1]
        self.covs = [np.array(cov) for cov in covs]
        self.invcovs = [np.linalg.inv(cov) for cov in self.covs]
        if weights is None: weights = [1. / len(means)] * len(means)
        self.weights = np.array(weights, dtype=np.float64)
        if np.sum(self.weights) <= 0:
            raise ValueError('Weight <= 0 in MixtureND')
        self.weights /= np.sum(weights)
        self.norms = (2 * np.pi) ** (0.5 * self.dim) * np.array([np.sqrt(np.linalg.det(cov)) for cov in self.covs])
        self.lims = lims
        self.paramNames = ParamNames(names=names, default=self.dim, labels=labels)
        self.names = self.paramNames.list()
        self.label = label
        self.total_mean = np.atleast_1d(np.dot(self.weights, self.means))
        self.total_cov = np.zeros((self.dim, self.dim))
        for mean, cov, weight, totmean in zip(self.means, self.covs, self.weights, self.total_mean):
            self.total_cov += weight * (cov + np.outer(mean - totmean, mean - totmean))
Ejemplo n.º 2
0
    def loadChains(self, root, files, ignore_lines=None):
        """
        Loads chains from files.

        :param root: Root name
        :param files: list of file names
        :param ignore_lines: Amount of lines at the start of the file to ignore, None if should not ignore
        :return: True if loaded successfully, False if none loaded
        """
        self.chains = []
        self.samples = None
        self.weights = None
        self.loglikes = None
        self.name_tag = self.name_tag or os.path.basename(root)
        for fname in files:
            if print_load_details: print(fname)
            self.chains.append(
                WeightedSamples(fname,
                                ignore_lines or self.ignore_lines,
                                min_weight_ratio=self.min_weight_ratio))
        if len(self.chains) == 0:
            raise WeightedSampleError('loadChains - no chains found for ' +
                                      root)
        if self.paramNames is None:
            self.paramNames = ParamNames(default=self.chains[0].n)
        self._weightsChanged()
        return len(self.chains) > 0
Ejemplo n.º 3
0
    def loadChains(self, root, files_or_samples, weights=None, loglikes=None,
                   ignore_lines=None):
        """
        Loads chains from files.

        :param root: Root name
        :param files_or_samples: list of file names or list of arrays of samples, or single array of samples
        :param weights: if loading from arrays of samples, corresponding list of arrays of weights
        :param loglikes: if loading from arrays of samples, corresponding list of arrays of -2 log(likelihood)
        :param ignore_lines: Amount of lines at the start of the file to ignore, None if should not ignore
        :return: True if loaded successfully, False if none loaded
        """
        self.chains = []
        self.samples = None
        self.weights = None
        self.loglikes = None
        if ignore_lines is None: ignore_lines = self.ignore_lines
        WSkwargs = {"ignore_rows": ignore_lines,
                    "min_weight_ratio": self.min_weight_ratio}
        if isinstance(files_or_samples, six.string_types) or isinstance(files_or_samples[0], six.string_types):
            # From files
            if weights is not None or loglikes is not None:
                raise ValueError('weights and loglikes not needed reading from file')
            if isinstance(files_or_samples, six.string_types): files_or_samples = [files_or_samples]
            self.name_tag = self.name_tag or os.path.basename(root)
            for fname in files_or_samples:
                if print_load_details: print(fname)
                self.chains.append(WeightedSamples(fname, **WSkwargs))
            nchains = len(self.chains)
            if not nchains:
                raise WeightedSampleError('loadChains - no chains found for ' + root)
        else:
            # From arrays
            dim = array_dimension(files_or_samples)
            if dim in [1, 2]:
                self.setSamples(slice_or_none(files_or_samples, ignore_lines),
                                slice_or_none(weights, ignore_lines),
                                slice_or_none(loglikes, ignore_lines), self.min_weight_ratio)
                if self.paramNames is None:
                    self.paramNames = ParamNames(default=self.n)
                nchains = 1
            elif dim == 3:
                for i, samples_i in enumerate(files_or_samples):
                    self.chains.append(WeightedSamples(
                        samples=samples_i, loglikes=None if loglikes is None else np.atleast_2d(loglikes)[i],
                        weights=None if weights is None else np.atleast_2d(weights)[i], **WSkwargs))
                if self.paramNames is None:
                    self.paramNames = ParamNames(default=self.chains[0].n)
                nchains = len(self.chains)
            else:
                raise ValueError('samples or files must be array of samples, or a list of arrays or files')
        self._weightsChanged()
        return nchains > 0
Ejemplo n.º 4
0
    def loadChains(self, root, files, ignore_lines=None):
        """
        Loads chains from files.

        :param root: Root name
        :param files: list of file names
        :param ignore_lines: Amount of lines at the start of the file to ignore, None if should not ignore
        :return: True if loaded successfully, False if none loaded
        """
        self.chains = []
        self.samples = None
        self.weights = None
        self.loglikes = None
        self.name_tag = self.name_tag or os.path.basename(root)
        for fname in files:
            if print_load_details: print(fname)
            self.chains.append(WeightedSamples(fname, ignore_lines or self.ignore_lines))
        if len(self.chains) == 0:
            raise WeightedSampleError('loadChains - no chains found for ' + root)
        if self.paramNames is None:
            self.paramNames = ParamNames(default=self.chains[0].n)
        self._weightsChanged()
        return len(self.chains) > 0
Ejemplo n.º 5
0
    def setParamNames(self, names=None):
        """
        Sets the names of the params.

        :param names: Either a :class:`~.paramnames.ParamNames` object, the name of a .paramnames file to load, a list of name strings,
                      otherwise use default names (param1, param2...).
        """
        self.paramNames = None
        if isinstance(names, ParamNames):
            self.paramNames = names
        elif isinstance(names, six.string_types):
            self.paramNames = ParamNames(names)
        elif names is not None:
            self.paramNames = ParamNames(names=names)
        elif self.samples is not None:
            self.paramNames = ParamNames(default=self.n)
        if self.paramNames:
            self._getParamIndices()
Ejemplo n.º 6
0
class MixtureND(object):
    """
    Gaussian mixture model with optional boundary ranges. Includes functions for generating samples and projecting.
    """
    def __init__(self,
                 means,
                 covs,
                 weights=None,
                 lims=None,
                 names=None,
                 label='',
                 labels=None):
        """
        :param means: list of y for each Gaussian in the mixture
        :param covs: list of covariances for the Gaussians in the mixture
        :param weights: optional weight for each component (defaults to equal weight)
        :param lims: optional list of hard limits for each parameter, [[x1min,x1max], [x2min,x2max]]; use None for no limit
        :param names: list of names (strings) for each parameter. If not set, set to "param1", "param2"...
        :param label: name for labelling this mixture
        :param labels: list of latex labels for each parameter. If not set, defaults to p_{1}, p_{2}...
        """

        self.means = np.asarray(means)
        self.dim = self.means.shape[1]
        self.covs = [np.array(cov) for cov in covs]
        self.invcovs = [np.linalg.inv(cov) for cov in self.covs]
        if weights is None: weights = [1. / len(means)] * len(means)
        self.weights = np.array(weights, dtype=np.float64)
        if np.sum(self.weights) <= 0:
            raise ValueError('Weight <= 0 in MixtureND')
        self.weights /= np.sum(weights)
        self.norms = (2 * np.pi)**(0.5 * self.dim) * np.array(
            [np.sqrt(np.linalg.det(cov)) for cov in self.covs])
        self.lims = lims
        self.paramNames = ParamNames(names=names,
                                     default=self.dim,
                                     labels=labels)
        self.names = self.paramNames.list()
        self.label = label
        self.total_mean = np.atleast_1d(np.dot(self.weights, self.means))
        self.total_cov = np.zeros((self.dim, self.dim))
        for mean, cov, weight, totmean in zip(self.means, self.covs,
                                              self.weights, self.total_mean):
            self.total_cov += weight * (
                cov + np.outer(mean - totmean, mean - totmean))

    def sim(self, size):
        """
        Generate an array of independent samples

        :param size: number of samples
        :return: 2D array of sample values
        """
        tot = 0
        res = []
        block = None
        while True:
            for num, mean, cov in zip(
                    np.random.multinomial(block or size, self.weights),
                    self.means, self.covs):
                if num > 0:
                    v = np.random.multivariate_normal(mean, cov, size=num)
                    if self.lims is not None:
                        for i, (mn, mx) in enumerate(self.lims):
                            if mn is not None: v = v[v[:, i] >= mn]
                            if mx is not None: v = v[v[:, i] <= mx]
                    tot += v.shape[0]
                    res.append(v)
            if tot >= size:
                break
            if block is None:
                block = min(
                    max(size, 100000),
                    int(1.1 * (size * (size - tot))) // max(tot, 1) + 1)
        samples = np.vstack(res)
        if len(res) > 1: samples = np.random.permutation(samples)
        if tot != size:
            samples = samples[:-(tot - size), :]
        return samples

    def MCSamples(self, size, names=None, logLikes=False, **kwargs):
        """
        Gets a set of independent samples from the mixture as a  :class:`.mcsamples.MCSamples` object ready for plotting etc.

        :param size: number of samples
        :param names: set to override existing names
        :param logLikes: if True set the sample likelihood values from the pdf, if false, don't store log likelihoods
        :return: a new :class:`.mcsamples.MCSamples` instance
        """
        samples = self.sim(size)
        if logLikes:
            loglikes = -np.log(self.pdf(samples))
        else:
            loglikes = None
        return MCSamples(samples=samples,
                         loglikes=loglikes,
                         paramNamesFile=copy.deepcopy(self.paramNames),
                         names=names,
                         ranges=self.lims,
                         **kwargs)

    def autoRanges(self, sigma_max=4, lims=None):
        res = []
        if lims is None: lims = self.lims
        if lims is None: lims = [(None, None) for _ in range(self.dim)]
        for i, (mn, mx) in enumerate(lims):
            covmin = None
            covmax = None
            if mn is None or mx is None:
                for mean, cov in zip(self.means, self.covs):
                    sigma = np.sqrt(cov[i, i])
                    xmin, xmax = mean[i] - sigma_max * sigma, mean[
                        i] + sigma_max * sigma
                    if mn is not None: xmax = max(xmax, mn + sigma_max * sigma)
                    if mx is not None: xmin = min(xmin, mx - sigma_max * sigma)
                    covmin = min(xmin, covmin) if covmin is not None else xmin
                    covmax = max(xmax, covmax) if covmax is not None else xmax
            res.append(
                (covmin if mn is None else mn, covmax if mx is None else mx))
        return res

    def pdf(self, x):
        """
        Calculate the PDF. Note this assumes x is within the boundaries (does not return zero outside)
        Result is also only normalized if no boundaries.

        :param x: array of parameter values to evaluate at
        :return: pdf at x
        """
        tot = None
        x = np.asarray(x)
        for i, (mean, icov, weight, norm) in enumerate(
                zip(self.means, self.invcovs, self.weights, self.norms)):
            dx = x - mean
            if len(x.shape) == 1:
                res = np.exp(-icov.dot(dx).dot(dx) / 2) / norm
            else:
                res = np.exp(
                    -np.einsum('ik,km,im->i', dx, icov, dx) / 2) / norm
            if not i:
                tot = res * weight
            else:
                tot += res * weight
        return tot

    def pdf_marged(self, index, x, no_limit_marge=False):
        """
        Calculate the 1D marginalized PDF. Only works if no other parameter limits are marginalized

        :param index: index or name of parameter
        :param x: value to evaluate PDF at
        :param no_limit_marge: if true don't raise an error if mixture has limits
        :return: marginalized 1D pdf at x
        """
        if isinstance(index, six.string_types): index = self.names.index(index)
        if not no_limit_marge: self.checkNoLimits([index])
        tot = None
        for i, (mean, cov,
                weight) in enumerate(zip(self.means, self.covs, self.weights)):
            dx = x - mean[index]
            var = cov[index, index]
            res = np.exp(-dx**2 / var / 2) / np.sqrt(2 * np.pi * var)
            if not i:
                tot = res * weight
            else:
                tot += res * weight
        return tot

    def density1D(self,
                  index=0,
                  num_points=1024,
                  sigma_max=4,
                  no_limit_marge=False):
        """
        Get 1D marginalized density. Only works if no hard limits in other parameters.

        :param index: parameter name or index
        :param num_points: number of grid points to evaluate PDF
        :param sigma_max: maximum number of standard deviations away from y to include in computed range
        :param no_limit_marge: if true don't raise error if limits on other parameters
        :return: :class:`~.densities.Density1D` instance
        """
        if isinstance(index, six.string_types):
            index = self.names.index(index)
        if not no_limit_marge:
            self.checkNoLimits([index])
        mn, mx = self.autoRanges(sigma_max)[index]
        x = np.linspace(mn, mx, num_points)
        like = self.pdf_marged(index, x)
        return Density1D(x, like)

    def density2D(self,
                  params=None,
                  num_points=1024,
                  xmin=None,
                  xmax=None,
                  ymin=None,
                  ymax=None,
                  sigma_max=5):
        """
        Get 2D marginalized density for a pair of parameters.

        :param params: list of two parameter names or indices to use. If already 2D, can be None.
        :param num_points: number of grid points for evaluation
        :param xmin: optional lower value for first parameter
        :param xmax: optional upper value for first parameter
        :param ymin: optional lower value for second parameter
        :param ymax: optional upper value for second parameter
        :param sigma_max: maximum number of standard deviations away from mean to include in calculated range
        :return: :class:`~.densities.Density2D` instance
        """
        if self.dim > 2 or params is not None or not isinstance(
                self, Mixture2D):
            mixture = self.marginalizedMixture(params=params)
        elif self.dim != 2:
            raise Exception('density2D requires at least two dimensions')
        else:
            mixture = self

        return mixture._density2D(num_points=num_points,
                                  xmin=xmin,
                                  xmax=xmax,
                                  ymin=ymin,
                                  ymax=ymax,
                                  sigma_max=sigma_max)

    def _params_to_indices(self, params):
        indices = []
        if params is None:
            params = self.names
        for p in params:
            if isinstance(p, six.string_types):
                indices.append(self.names.index(p))
            elif hasattr(p, 'name'):
                indices.append(self.names.index(p.name))
            else:
                indices.append(p)
        return indices

    def marginalizedMixture(self, params, label=None, no_limit_marge=False):
        """
        Calculates a reduced mixture model by marginalization over unwanted parameters

        :param params: array of parameter names or indices to retain. If none, will simply return a copy of this mixture.
        :param label: optional label for the marginalized mixture
        :param no_limit_marge: if true don't raise an error if mixture has limits.
        :return: a new marginalized  :class:`MixtureND` instance
        """

        indices = self._params_to_indices(params)
        if not no_limit_marge:
            self.checkNoLimits(indices)
        indices = np.array(indices)
        if self.names is not None:
            names = [self.names[i] for i in indices]
        else:
            names = None
        if self.lims is not None:
            lims = [self.lims[i] for i in indices]
        else:
            lims = None
        if label is None:
            label = self.label
        covs = [cov[np.ix_(indices, indices)] for cov in self.covs]
        means = [mean[indices] for mean in self.means]
        if len(indices) == 2:
            tp = Mixture2D
        else:
            tp = MixtureND
        mixture = tp(means,
                     covs,
                     self.weights,
                     lims=lims,
                     names=names,
                     label=label)
        mixture.paramNames.setLabelsAndDerivedFromParamNames(self.paramNames)
        return mixture

    def conditionalMixture(self, fixed_params, fixed_param_values, label=None):
        """
        Returns a reduced conditional mixture model for the distribution when certainly parameters are fixed.

        :param fixed_params: list of names or numbers of parameters to fix
        :param fixed_param_values:  list of values for the fixed parameters
        :param label: optional label for the new mixture
        :return: A new :class:`MixtureND` instance with cov_i = Projection(Cov_i^{-1})^{-1} and shifted conditional y
        """

        fixed_params = self._params_to_indices(fixed_params)
        self.checkNoLimits(fixed_params)
        keep_params = [i for i in range(self.dim) if not i in fixed_params]
        if not len(keep_params):
            raise ValueError(
                'conditionalMixture must leave at least one non-fixed parameter'
            )
        new_means = []
        new_covs = []
        new_weights = []
        for mean, cov, invcov, weight in zip(self.means, self.covs,
                                             self.invcovs, self.weights):
            deltas = np.asarray(fixed_param_values) - mean[fixed_params]
            new_cov = np.linalg.inv(invcov[np.ix_(keep_params, keep_params)])
            new_mean = mean[keep_params] - new_cov.dot(invcov[np.ix_(
                keep_params, fixed_params)].dot(deltas))
            if len(self.weights) == 1 and False:
                logw = 0
            else:
                logw = invcov[np.ix_(fixed_params, fixed_params)].dot(deltas).dot(deltas) \
                       + np.log(np.linalg.det(cov[np.ix_(fixed_params, fixed_params)]
                                              - cov[np.ix_(fixed_params, keep_params)].dot(
                    np.linalg.inv(cov[np.ix_(keep_params, keep_params)]).dot(
                        cov[np.ix_(keep_params, fixed_params)]))))
            new_weights.append(logw)
            new_means.append(new_mean)
            new_covs.append(new_cov)

        new_weights = np.exp(-(np.asarray(new_weights) - min(new_weights)) / 2)
        if self.names is not None:
            names = [self.names[i] for i in keep_params]
        else:
            names = None
        mixture = MixtureND(new_means,
                            new_covs,
                            new_weights,
                            names=names,
                            label=label)
        mixture.paramNames.setLabelsAndDerivedFromParamNames(self.paramNames)
        return mixture

    def checkNoLimits(self, keep_params):
        if self.lims is None:
            return
        for i, lim in enumerate(self.lims):
            if i not in keep_params and (lim[0] is not None
                                         or lim[1] is not None):
                raise Exception(
                    'In general can only marginalize analytically if no hard boundary limits: '
                    + self.label)

    def getUpper(self, name):
        if self.lims is None: return None
        return self.lims[self.names.index(name)][1]

    def getLower(self, name):
        if self.lims is None:
            return None
        return self.lims[self.names.index(name)][1]
Ejemplo n.º 7
0
class MixtureND(object):
    """
    Gaussian mixture model with optional boundary ranges. Includes functions for generating samples and projecting.
    """

    def __init__(self, means, covs, weights=None, lims=None, names=None, label='', labels=None):
        """
        :param means: list of means for each Gaussian in the mixture
        :param covs: list of covariances for the Gaussians in the mixture
        :param weights: optional weight for each component (defaults to equal weight)
        :param lims: optional list of hard limits for each parameter, [[x1min,x1max], [x2min,x2max]]; use None for no limit
        :param names: list of names (strings) for each parameter. If not set, set to "param1", "param2"...
        :param label: name for labelling this mixture
        :param labels: list of latex labels for each parameter. If not set, defaults to p_{1}, p_{2}...
        """

        self.means = np.asarray(means)
        self.dim = self.means.shape[1]
        self.covs = [np.array(cov) for cov in covs]
        self.invcovs = [np.linalg.inv(cov) for cov in self.covs]
        if weights is None: weights = [1. / len(means)] * len(means)
        self.weights = np.array(weights, dtype=np.float64)
        if np.sum(self.weights) <= 0:
            raise ValueError('Weight <= 0 in MixtureND')
        self.weights /= np.sum(weights)
        self.norms = (2 * np.pi) ** (0.5 * self.dim) * np.array([np.sqrt(np.linalg.det(cov)) for cov in self.covs])
        self.lims = lims
        self.paramNames = ParamNames(names=names, default=self.dim, labels=labels)
        self.names = self.paramNames.list()
        self.label = label
        self.total_mean = np.atleast_1d(np.dot(self.weights, self.means))
        self.total_cov = np.zeros((self.dim, self.dim))
        for mean, cov, weight, totmean in zip(self.means, self.covs, self.weights, self.total_mean):
            self.total_cov += weight * (cov + np.outer(mean - totmean, mean - totmean))

    def sim(self, size):
        """
        Generate an array of independent samples

        :param size: number of samples
        :return: 2D array of sample values
        """
        tot = 0
        res = []
        block = None
        while True:
            for num, mean, cov in zip(np.random.multinomial(block or size, self.weights), self.means, self.covs):
                if num > 0:
                    v = np.random.multivariate_normal(mean, cov, size=num)
                    if self.lims is not None:
                        for i, (mn, mx) in enumerate(self.lims):
                            if mn is not None: v = v[v[:, i] >= mn]
                            if mx is not None: v = v[v[:, i] <= mx]
                    tot += v.shape[0]
                    res.append(v)
            if tot >= size:
                break
            if block is None:
                block = min(max(size, 100000), int(1.1 * (size * (size - tot))) // max(tot, 1) + 1)
        samples = np.vstack(res)
        if len(res) > 1: samples = np.random.permutation(samples)
        if tot != size:
            samples = samples[:-(tot - size), :]
        return samples

    def MCSamples(self, size, names=None, logLikes=False, **kwargs):
        """
        Gets a set of independent samples from the mixture as a  :class:`.mcsamples.MCSamples` object ready for plotting etc.

        :param size: number of samples
        :param names: set to override existing names
        :param logLikes: if True set the sample likelihood values from the pdf, if false, don't store log likelihoods
        :return: list of [x,y] pair names
        """
        samples = self.sim(size)
        if logLikes:
            loglikes = -np.log(self.pdf(samples))
        else:
            loglikes = None
        return MCSamples(samples=samples, loglikes=loglikes, paramNamesFile=self.paramNames, names=names,
                         ranges=self.lims, **kwargs)

    def autoRanges(self, sigma_max=4, lims=None):
        res = []
        if lims is None: lims = self.lims
        if lims is None: lims = [(None, None) for _ in range(self.dim)]
        for i, (mn, mx) in enumerate(lims):
            covmin = None
            covmax = None
            if mn is None or mx is None:
                for mean, cov in zip(self.means, self.covs):
                    sigma = np.sqrt(cov[i, i])
                    xmin, xmax = mean[i] - sigma_max * sigma, mean[i] + sigma_max * sigma
                    if mn is not None: xmax = max(xmax, mn + sigma_max * sigma)
                    if mx is not None: xmin = min(xmin, mx - sigma_max * sigma)
                    covmin = min(xmin, covmin) if covmin is not None else xmin
                    covmax = max(xmax, covmax) if covmax is not None else xmax
            res.append((covmin if mn is None else mn, covmax if mx is None else mx))
        return res

    def pdf(self, x):
        """
        Calculate the PDF. Note this assumes x is within the boundaries (does not return zero outside)
        Result is also only normalized if no boundaries.

        :param x: array of parameter values to evaluate at
        :return: pdf at x
        """
        tot = None
        x = np.asarray(x)
        for i, (mean, icov, weight, norm) in enumerate(zip(self.means, self.invcovs, self.weights, self.norms)):
            dx = x - mean
            if len(x.shape) == 1:
                res = np.exp(-icov.dot(dx).dot(dx) / 2) / norm
            else:
                res = np.exp(-np.einsum('ik,km,im->i', dx, icov, dx) / 2) / norm
            if not i:
                tot = res * weight
            else:
                tot += res * weight
        return tot

    def pdf_marged(self, index, x, no_limit_marge=False):
        """
        Calculate the 1D marginalized PDF. Only works if no other parameter limits are marginalized

        :param index: index or name of parameter
        :param x: value to evaluate PDF at
        :param no_limit_marge: if true don't raise an error if mixture has limits
        :return: marginalized 1D pdf at x
        """
        if isinstance(index, six.string_types): index = self.names.index(index)
        if not no_limit_marge: self.checkNoLimits([index])
        tot = None
        for i, (mean, cov, weight) in enumerate(zip(self.means, self.covs, self.weights)):
            dx = x - mean[index]
            var = cov[index, index]
            res = np.exp(-dx ** 2 / var / 2) / np.sqrt(2 * np.pi * var)
            if not i:
                tot = res * weight
            else:
                tot += res * weight
        return tot

    def density1D(self, index=0, num_points=1024, sigma_max=4, no_limit_marge=False):
        """
        Get 1D marginalized density. Only works if no hard limits in other parameters.

        :param index: parameter name or index
        :param num_points: number of grid points to evaluate PDF
        :param sigma_max: maximum number of standard deviations away from means to include in computed range
        :param no_limit_marge: if true don't raise error if limits on other parameters
        :return: :class:`~.densities.Density1D` instance
        """
        if isinstance(index, six.string_types): index = self.names.index(index)
        if not no_limit_marge: self.checkNoLimits([index])
        mn, mx = self.autoRanges(sigma_max)[index]
        x = np.linspace(mn, mx, num_points)
        like = self.pdf_marged(index, x)
        return Density1D(x, like)

    def density2D(self, params=None, num_points=1024, xmin=None, xmax=None, ymin=None, ymax=None, sigma_max=5):
        """
        Get 2D marginalized density for a pair of parameters.

        :param params: list of two parameter names or indices to use. If already 2D, can be None.
        :param num_points: number of grid points for evaluation
        :param xmin: optional lower value for first parameter
        :param xmax: optional upper value for first parameter
        :param ymin: optional lower value for second parameter
        :param ymax: optional upper value for second parameter
        :param sigma_max: maximum number of standard deviations away from mean to include in calculated range
        :return: :class:`~.densities.Density2D` instance
        """
        if self.dim > 2 or params is not None or not isinstance(self, Mixture2D):
            mixture = self.marginalizedMixture(params=params)
        elif self.dim != 2:
            raise Exception('density2D requires at least two dimensions')
        else:
            mixture = self

        return mixture._density2D(num_points=num_points, xmin=xmin, xmax=xmax, ymin=ymin,
                                  ymax=ymax, sigma_max=sigma_max)

    def _params_to_indices(self, params):
        indices = []
        if params is None: params = self.names
        for p in params:
            if isinstance(p, six.string_types):
                indices.append(self.names.index(p))
            elif hasattr(p, 'name'):
                indices.append(self.names.index(p.name))
            else:
                indices.append(p)
        return indices

    def marginalizedMixture(self, params, label=None, no_limit_marge=False):
        """
        Calculates a reduced mixture model by marginalization over unwanted parameters

        :param params: array of parameter names or indices to retain. If none, will simply return a copy of this mixture.
        :param label: optional label for the marginalized mixture
        :param no_limit_marge: if true don't raise an error if mixture has limits.
        :return: a new marginalized  :class:`MixtureND` instance
        """

        indices = self._params_to_indices(params)
        if not no_limit_marge: self.checkNoLimits(indices)
        indices = np.array(indices)
        if self.names is not None:
            names = [self.names[i] for i in indices]
        else:
            names = None
        if self.lims is not None:
            lims = [self.lims[i] for i in indices]
        else:
            lims = None
        if label is None: label = self.label
        covs = [cov[np.ix_(indices, indices)] for cov in self.covs]
        means = [mean[indices] for mean in self.means]
        if len(indices) == 2:
            tp = Mixture2D
        else:
            tp = MixtureND
        mixture = tp(means, covs, self.weights, lims=lims,
                     names=names, label=label)
        mixture.paramNames.setLabelsAndDerivedFromParamNames(self.paramNames)
        return mixture

    def conditionalMixture(self, fixed_params, fixed_param_values, label=None):
        """
        Returns a reduced conditional mixture model for the distribution when certainly parameters are fixed.

        :param fixed_params: list of names or numbers of parameters to fix
        :param fixed_param_values:  list of values for the fixed parameters
        :param label: optional label for the new mixture
        :return: A new :class:`MixtureND` instance with cov_i = Projection(Cov_i^{-1})^{-1} and shifted conditional means
        """

        fixed_params = self._params_to_indices(fixed_params)
        self.checkNoLimits(fixed_params)
        keep_params = [i for i in range(self.dim) if not i in fixed_params]
        if not len(keep_params):
            raise ValueError('conditionalMixture must leave at least one non-fixed parameter')
        new_means = []
        new_covs = []
        new_weights = []
        for mean, cov, invcov, weight in zip(self.means, self.covs, self.invcovs, self.weights):
            deltas = np.asarray(fixed_param_values) - mean[fixed_params]
            new_cov = np.linalg.inv(invcov[np.ix_(keep_params, keep_params)])
            new_mean = mean[keep_params] - new_cov.dot(invcov[np.ix_(keep_params, fixed_params)].dot(deltas))
            if len(self.weights) == 1 and False:
                logw = 0
            else:
                logw = invcov[np.ix_(fixed_params, fixed_params)].dot(deltas).dot(deltas) \
                       + np.log(np.linalg.det(cov[np.ix_(fixed_params, fixed_params)]
                                              - cov[np.ix_(fixed_params, keep_params)].dot(
                    np.linalg.inv(cov[np.ix_(keep_params, keep_params)]).dot(
                        cov[np.ix_(keep_params, fixed_params)]))))
            new_weights.append(logw)
            new_means.append(new_mean)
            new_covs.append(new_cov)

        new_weights = np.exp(-(np.asarray(new_weights) - min(new_weights)) / 2)
        if self.names is not None:
            names = [self.names[i] for i in keep_params]
        else:
            names = None
        mixture = MixtureND(new_means, new_covs, new_weights, names=names, label=label)
        mixture.paramNames.setLabelsAndDerivedFromParamNames(self.paramNames)
        return mixture

    def checkNoLimits(self, keep_params):
        if self.lims is None: return
        for i, lim in enumerate(self.lims):
            if not i in keep_params and (lim[0] is not None or lim[1] is not None):
                raise Exception(
                    'In general can only marginalize analytically if no hard boundary limits: ' + self.label)

    def getUpper(self, name):
        if self.lims is None: return None
        return self.lims[self.names.index(name)][1]

    def getLower(self, name):
        if self.lims is None: return None
        return self.lims[self.names.index(name)][1]
Ejemplo n.º 8
0
class Chains(WeightedSamples):
    """
    Holds one or more sets of weighted samples, for example a set of MCMC chains.
    Inherits from :class:`~.chains.WeightedSamples`, also adding parameter names and labels

    :ivar paramNames: a :class:`~.paramnames.ParamNames` instance holding the parameter names and labels
    """

    def __init__(self, root=None, jobItem=None, paramNamesFile=None, names=None, labels=None, **kwargs):
        """

        :param root: optional root name for files
        :param jobItem: optional jobItem for parameter grid item
        :param paramNamesFile: optional filename of a .paramnames files that holds parameter names
        :param names: optional list of names for the parameters
        :param labels: optional list of latex labels for the parameters
        :param kwargs: extra options for :class:`~.chains.WeightedSamples`'s constructor

        """
        self.chains = None
        WeightedSamples.__init__(self, **kwargs)
        self.jobItem = jobItem
        self.ignore_lines = float(kwargs.get('ignore_rows', 0))
        self.root = root
        if not paramNamesFile and root:
            mid = ('' if root.endswith("/") else "__")
            if os.path.exists(root + '.paramnames'):
                paramNamesFile = root + '.paramnames'
            elif os.path.exists(root + mid + 'full.yaml'):
                paramNamesFile = root + mid + 'full.yaml'
        self.setParamNames(paramNamesFile or names)
        if labels is not None:
            self.paramNames.setLabels(labels)

    def setParamNames(self, names=None):
        """
        Sets the names of the params.

        :param names: Either a :class:`~.paramnames.ParamNames` object, the name of a .paramnames file to load, a list of name strings,
                      otherwise use default names (param1, param2...).
        """
        self.paramNames = None
        if isinstance(names, ParamNames):
            self.paramNames = names
        elif isinstance(names, six.string_types):
            self.paramNames = ParamNames(names)
        elif names is not None:
            self.paramNames = ParamNames(names=names)
        elif self.samples is not None:
            self.paramNames = ParamNames(default=self.n)
        if self.paramNames:
            self._getParamIndices()
        self.needs_update = True

    def filter(self, where):
        """
        Filter the stored samples to keep only samples matching filter

        :param where: list of sample indices to keep, or boolean array filter (e.g. x>5 to keep only samples where x>5)
        """

        if self.chains is None:
            if hasattr(self, 'chain_offsets'):
                # must update chain_offsets to be able to correctly split back into separate filtered chains if needed
                lens = [0]
                for off1, off2 in zip(self.chain_offsets[:-1], self.chain_offsets[1:]):
                    lens.append(np.count_nonzero(where[off1:off2]))
                self.chain_offsets = np.cumsum(np.array(lens))
            super(Chains, self).filter(where)
        else:
            raise ValueError('chains are separated, makeSingle first or call filter on individual chains')

    def getParamNames(self):
        """
        Get :class:`~.paramnames.ParamNames` object with names for the parameters

        :return: :class:`~.paramnames.ParamNames` object giving parameter names and labels
        """
        return self.paramNames

    def _getParamIndices(self):
        """
        Gets the indices of the params.

        :return: A dict mapping the param name to the parameter index.
        """
        if self.samples is not None and len(self.paramNames.names) != self.n:
            raise WeightedSampleError("paramNames size does not match number of parameters in samples")
        index = dict()
        for i, name in enumerate(self.paramNames.names):
            index[name.name] = i
        self.index = index
        return self.index

    def setParams(self, obj):
        """
        Adds array variables obj.name1, obj.name2 etc, where
        obj.name1 is the vector of samples with name 'name1'

        if a parameter name is of the form aa.bb.cc, it makes subobjects so you can reference obj.aa.bb.cc

        :param obj: The object instance to add the parameter vectors variables
        :return: The obj after alterations.
        """
        for i, name in enumerate(self.paramNames.names):
            path = name.name.split('.')
            ob = obj
            for p in path[:-1]:
                if not hasattr(ob, p):
                    setattr(ob, p, ParSamples())
                ob = getattr(ob, p)
            setattr(ob, path[-1], self.samples[:, i])
        return obj

    def getParams(self):
        """
        Creates a :class:`~.chains.ParSamples` object, with variables giving vectors for all the parameters,
        for example samples.getParams().name1 would be the vector of samples with name 'name1'

        :return: A :class:`~.chains.ParSamples` object containing all the parameter vectors, with attributes given by the parameter names
        """
        pars = ParSamples()
        self.setParams(pars)
        return pars

    def getParamSampleDict(self, ix):
        """
        Returns a dictionary of parameter values for sample number ix
        """
        from collections import OrderedDict
        res = OrderedDict()
        for i, name in enumerate(self.paramNames.names):
            res[name.name] = self.samples[ix, i]
        res['weight'] = self.weights[i]
        res['loglike'] = self.loglikes[i]
        return res

    def _makeParamvec(self, par):
        if self.needs_update: self.updateBaseStatistics()
        if isinstance(par, ParamInfo): par = par.name
        if isinstance(par, six.string_types):
            return self.samples[:, self.index[par]]
        return WeightedSamples._makeParamvec(self, par)

    def updateChainBaseStatistics(self):
        # old name, use updateBaseStatistics
        return self.updateBaseStatistics()

    def updateBaseStatistics(self):
        """
        Updates basic computed statistics for this chain, e.g. after any changes to the samples or weights

        :return: self after updating statistics.
        """
        self.getVars()
        self.mean_mult = self.norm / self.numrows
        self.max_mult = np.max(self.weights)
        self._getParamIndices()
        self.needs_update = False
        return self

    def addDerived(self, paramVec, name, **kwargs):
        """
        Adds a new parameter

        :param paramVec: The vector of parameter values to add.
        :param name: The name for the new parameter
        :param kwargs: arguments for paramnames' :func:`~.paramnames.ParamList.addDerived`
        :return: The added parameter's :class:`~.paramnames.ParamInfo` object
        """
        if self.paramNames.parWithName(name):
            raise ValueError('Parameter with name %s already exists' % name)
        self.changeSamples(np.c_[self.samples, paramVec])
        return self.paramNames.addDerived(name, **kwargs)

    def loadChains(self, root, files, ignore_lines=None):
        """
        Loads chains from files.

        :param root: Root name
        :param files: list of file names
        :param ignore_lines: Amount of lines at the start of the file to ignore, None if should not ignore
        :return: True if loaded successfully, False if none loaded
        """
        self.chains = []
        self.samples = None
        self.weights = None
        self.loglikes = None
        self.name_tag = self.name_tag or os.path.basename(root)
        for fname in files:
            if print_load_details: print(fname)
            self.chains.append(
                WeightedSamples(fname, ignore_lines or self.ignore_lines, min_weight_ratio=self.min_weight_ratio))
        if len(self.chains) == 0:
            raise WeightedSampleError('loadChains - no chains found for ' + root)
        if self.paramNames is None:
            self.paramNames = ParamNames(default=self.chains[0].n)
        self._weightsChanged()
        return len(self.chains) > 0

    def getGelmanRubinEigenvalues(self, nparam=None, chainlist=None):
        """
        Assess convergence using var(mean)/mean(var) in the orthogonalized parameters
        c.f. Brooks and Gelman 1997.

        :param nparam: The number of parameters (starting at first), by default uses all of them
        :param chainlist: list of :class:`WeightedSamples`, the samples to use. Defaults to all the separate chains in this instance.
        :return: array of  var(mean)/mean(var) for orthogonalized parameters
        """
        if chainlist is None:
            chainlist = self.getSeparateChains()
        nparam = nparam or self.paramNames.numNonDerived()
        meanscov = np.zeros((nparam, nparam))
        means = self.getMeans()[:nparam]
        meancov = np.zeros(meanscov.shape)
        for chain in chainlist:
            diff = chain.getMeans()[:nparam] - means
            meanscov += np.outer(diff, diff)
            meancov += chain.getCov(nparam)
        meanscov /= (len(chainlist) - 1)
        meancov /= len(chainlist)
        w, U = np.linalg.eigh(meancov)
        if np.min(w) > 0:
            U /= np.sqrt(w)
            D = np.linalg.eigvalsh(np.dot(U.T, meanscov).dot(U))
            return D
        else:
            return None

    def getGelmanRubin(self, nparam=None, chainlist=None):
        """
        Assess the convergence using the maximum var(mean)/mean(var) of orthogonalized parameters
        c.f. Brooks and Gelman 1997.

        :param nparam: The number of parameters, by default uses all
        :param chainlist: list of :class:`WeightedSamples`, the samples to use. Defaults to all the separate chains in this instance.
        :return: The worst var(mean)/mean(var) for orthogonalized parameters. Should be <<1 for good convergence.
        """
        return np.max(self.getGelmanRubinEigenvalues(nparam, chainlist))

    def makeSingle(self):
        """
        Combines separate chains into one samples array, so self.samples has all the samples
        and this instance can then be used as a general :class:`~.chains.WeightedSamples` instance.

        :return: self
        """
        self.chain_offsets = np.cumsum(np.array([0] + [chain.samples.shape[0] for chain in self.chains]))
        weights = np.hstack((chain.weights for chain in self.chains))
        loglikes = np.hstack((chain.loglikes for chain in self.chains))
        self.setSamples(np.vstack((chain.samples for chain in self.chains)), weights, loglikes, min_weight_ratio=-1)
        self.chains = None
        self.needs_update = True
        return self

    def getSeparateChains(self):
        """
        Gets a list of samples for separate chains.
        If the chains have already been combined, uses the stored sample offsets to reconstruct the array (generally no array copying)

        :return: The list of :class:`~.chains.WeightedSamples` for each chain.
        """
        if self.chains is not None:
            return self.chains
        chainlist = []
        for off1, off2 in zip(self.chain_offsets[:-1], self.chain_offsets[1:]):
            chainlist.append(WeightedSamples(samples=self.samples[off1:off2], weights=self.weights[off1:off2],
                                             loglikes=self.loglikes[off1:off2]))
        return chainlist

    def removeBurnFraction(self, ignore_frac):
        """
        Remove a fraction of the samples as burn in

        :param ignore_frac: fraction of sample points to remove from the start of the samples, or each chain if not combined
        """
        if self.samples is not None:
            self.removeBurn(ignore_frac)
            self.chains = None
            self.needs_update = True
        else:
            for chain in self.chains:
                chain.removeBurn(ignore_frac)

    def deleteFixedParams(self):
        """
        Delete parameters that are fixed (the same value in all samples)
        """
        if self.samples is not None:
            fixed = WeightedSamples.deleteFixedParams(self)
            self.chains = None
        else:
            fixed = []
            chain = self.chains[0]
            for i in range(chain.n):
                if np.all(chain.samples[:, i] == chain.samples[0, i]): fixed.append(i)
            for chain in self.chains:
                chain.changeSamples(np.delete(chain.samples, fixed, 1))
        self.paramNames.deleteIndices(fixed)
        self._getParamIndices()

    def saveAsText(self, root, chain_index=None, make_dirs=False):
        """
        Saves the samples as text files, including parameter names as .paramnames file.

        :param root: The root name to use
        :param chain_index: Optional index to be used for the filename, zero based, e.g. for saving one of multiple chains
        :param make_dirs: True if this should (recursively) create the directory if it doesn't exist
        """
        super(Chains, self).saveAsText(root, chain_index, make_dirs)
        if not chain_index: self.paramNames.saveAsText(root + '.paramnames')

    def savePickle(self, filename):
        """
        Save the current object to a file in pickle format

        :param filename: The file to write to
        """

        with open(filename, 'wb') as output:
            pickle.dump(self, output, pickle.HIGHEST_PROTOCOL)
Ejemplo n.º 9
0
class Chains(WeightedSamples):
    """
    Holds one or more sets of weighted samples, for example a set of MCMC chains. 
    Inherits from :class:`~.chains.WeightedSamples`, also adding parameter names and labels
    
    :ivar paramNames: a :class:`~.paramnames.ParamNames` instance holding the parameter names and labels
    """

    def __init__(self, root=None, jobItem=None, paramNamesFile=None, names=None, labels=None, **kwargs):
        """

        :param root: optional root name for files
        :param jobItem: optional jobItem for parameter grid item
        :param paramNamesFile: optional filename of a .paramnames files that holds parameter names
        :param names: optional list of names for the parameters
        :param labels: optional list of latex labels for the parameters
        :param kwargs: extra options for :class:`~.chains.WeightedSamples`'s constructor

        """
        WeightedSamples.__init__(self, **kwargs)
        self.jobItem = jobItem
        self.precision = '%.8e'
        self.ignore_lines = float(kwargs.get('ignore_rows', 0))
        self.root = root
        if not paramNamesFile and root and os.path.exists(root + '.paramnames'):
            paramNamesFile = root + '.paramnames'
        self.needs_update = True
        self.chains = None
        self.setParamNames(paramNamesFile or names)
        if labels is not None:
            self.paramNames.setLabels(labels)

    def setParamNames(self, names=None):
        """
        Sets the names of the params.

        :param names: Either a :class:`~.paramnames.ParamNames` object, the name of a .paramnames file to load, a list of name strings,
                      otherwise use default names (param1, param2...).
        """
        self.paramNames = None
        if isinstance(names, ParamNames):
            self.paramNames = names
        elif isinstance(names, six.string_types):
            self.paramNames = ParamNames(names)
        elif names is not None:
            self.paramNames = ParamNames(names=names)
        elif self.samples is not None:
            self.paramNames = ParamNames(default=self.n)
        if self.paramNames:
            self._getParamIndices()

    def getParamNames(self):
        """
        Get :class:`~.paramnames.ParamNames` object with names for the parameters

        :return: :class:`~.paramnames.ParamNames` object giving parameter names and labels
        """
        return self.paramNames

    def _getParamIndices(self):
        """
        Gets the indices of the params.

        :return: A dict mapping the param name to the parameter index.
        """
        if self.samples is not None and len(self.paramNames.names) != self.n:
            raise WeightedSampleError("paramNames size does not match number of parameters in samples")
        index = dict()
        for i, name in enumerate(self.paramNames.names):
            index[name.name] = i
        self.index = index
        return self.index

    def setParams(self, obj):
        """
        Adds array variables obj.name1, obj.name2 etc, where
        obj.name1 is the vector of samples with name 'name1'
        
        if a parameter name is of the form aa.bb.cc, it makes subobjects so you can reference obj.aa.bb.cc

        :param obj: The object instance to add the parameter vectors variables
        :return: The obj after alterations.
        """
        for i, name in enumerate(self.paramNames.names):
            path = name.name.split('.')
            ob = obj
            for p in path[:-1]:
                if not hasattr(ob, p):
                    setattr(ob, p, ParSamples())
                ob = getattr(ob, p)
            setattr(ob, path[-1], self.samples[:, i])
        return obj

    def getParams(self):
        """
        Creates a :class:`~.chains.ParSamples` object, with variables giving vectors for all the parameters,
        for example samples.getParams().name1 would be the vector of samples with name 'name1'

        :return: A :class:`~.chains.ParSamples` object containing all the parameter vectors, with attributes given by the parameter names
        """
        pars = ParSamples()
        self.setParams(pars)
        return pars

    def _makeParamvec(self, par):
        if self.needs_update: self.updateBaseStatistics()
        if isinstance(par, ParamInfo): par = par.name
        if isinstance(par, six.string_types):
            return self.samples[:, self.index[par]]
        return WeightedSamples._makeParamvec(self, par)

    def updateChainBaseStatistics(self):
        # old name, use updateBaseStatistics
        return self.updateBaseStatistics()

    def updateBaseStatistics(self):
        """
        Updates basic computed statistics for this chain, e.g. after any changes to the samples or weights

        :return: self after updating statistics.
        """
        self.getVars()
        self.mean_mult = self.norm / self.numrows
        self.max_mult = np.max(self.weights)
        self._getParamIndices()
        self.needs_update = False
        return self

    def addDerived(self, paramVec, name, **kwargs):
        """
        Adds a new parameter

        :param paramVec: The vector of parameter values to add.
        :param name: The name for the new parameter
        :param kwargs: arguments for paramnames' :func:`~.paramnames.ParamList.addDerived`
        :return: The added parameter's :class:`~.paramnames.ParamInfo` object
        """
        if self.paramNames.parWithName(name):
            raise ValueError('Parameter with name %s already exists' % name)
        self.changeSamples(np.c_[self.samples, paramVec])
        return self.paramNames.addDerived(name, **kwargs)

    def loadChains(self, root, files, ignore_lines=None):
        """
        Loads chains from files.

        :param root: Root name
        :param files: list of file names
        :param ignore_lines: Amount of lines at the start of the file to ignore, None if should not ignore
        :return: True if loaded successfully, False if none loaded
        """
        self.chains = []
        self.samples = None
        self.weights = None
        self.loglikes = None
        self.name_tag = self.name_tag or os.path.basename(root)
        for fname in files:
            if print_load_details: print(fname)
            self.chains.append(WeightedSamples(fname, ignore_lines or self.ignore_lines))
        if len(self.chains) == 0:
            raise WeightedSampleError('loadChains - no chains found for ' + root)
        if self.paramNames is None:
            self.paramNames = ParamNames(default=self.chains[0].n)
        self._weightsChanged()
        return len(self.chains) > 0

    def getGelmanRubinEigenvalues(self, nparam=None, chainlist=None):
        """
        Assess convergence using var(mean)/mean(var) in the orthogonalized parameters
        c.f. Brooks and Gelman 1997.

        :param nparam: The number of parameters (starting at first), by default uses all of them
        :param chainlist: list of :class:`WeightedSamples`, the samples to use. Defaults to all the separate chains in this instance.
        :return: array of  var(mean)/mean(var) for orthogonalized parameters
        """
        if chainlist is None:
            chainlist = self.getSeparateChains()
        nparam = nparam or self.paramNames.numNonDerived()
        meanscov = np.zeros((nparam, nparam))
        means = self.getMeans()[:nparam]
        meancov = np.zeros(meanscov.shape)
        for chain in chainlist:
            diff = chain.getMeans()[:nparam] - means
            meanscov += np.outer(diff, diff)
            meancov += chain.getCov(nparam)
        meanscov /= (len(chainlist) - 1)
        meancov /= len(chainlist)
        w, U = np.linalg.eigh(meancov)
        if np.min(w) > 0:
            U /= np.sqrt(w)
            D = np.linalg.eigvalsh(np.dot(U.T, meanscov).dot(U))
            return D
        else:
            return None

    def getGelmanRubin(self, nparam=None, chainlist=None):
        """
        Assess the convergence using the maximum var(mean)/mean(var) of orthogonalized parameters
        c.f. Brooks and Gelman 1997.

        :param nparam: The number of parameters, by default uses all
        :param chainlist: list of :class:`WeightedSamples`, the samples to use. Defaults to all the separate chains in this instance.
        :return: The worst var(mean)/mean(var) for orthogonalized parameters. Should be <<1 for good convergence.
        """
        return np.max(self.getGelmanRubinEigenvalues(nparam, chainlist))

    def makeSingle(self):
        """
        Combines separate chains into one samples array, so self.samples has all the samples
        and this instance can then be used as a general :class:`~.chains.WeightedSamples` instance.
        
        :return: self
        """
        self.chain_offsets = np.cumsum(np.array([0] + [chain.samples.shape[0] for chain in self.chains]))
        weights = np.hstack((chain.weights for chain in self.chains))
        loglikes = np.hstack((chain.loglikes for chain in self.chains))
        self.setSamples(np.vstack((chain.samples for chain in self.chains)), weights, loglikes)
        self.chains = None
        self.needs_update = True
        return self

    def getSeparateChains(self):
        """
        Gets a list of samples for separate chains. 
        If the chains have already been combined, uses the stored sample offsets to reconstruct the array (generally no array copying)

        :return: The list of :class:`~.chains.WeightedSamples` for each chain. 
        """
        if self.chains is not None:
            return self.chains
        chainlist = []
        for off1, off2 in zip(self.chain_offsets[:-1], self.chain_offsets[1:]):
            chainlist.append(WeightedSamples(samples=self.samples[off1:off2], weights=self.weights[off1:off2],
                                             loglikes=self.loglikes[off1:off2]))
        return chainlist

    def removeBurnFraction(self, ignore_frac):
        """
        Remove a fraction of the samples as burn in

        :param ignore_frac: fraction of sample points to remove from the start of the samples, or each chain if not combined
        """
        if self.samples is not None:
            self.removeBurn(ignore_frac)
            self.chains = None
            self.needs_update = True
        else:
            for chain in self.chains:
                chain.removeBurn(ignore_frac)

    def deleteFixedParams(self):
        """
        Delete parameters that are fixed (the same value in all samples)
        """
        if self.samples is not None:
            fixed = WeightedSamples.deleteFixedParams(self)
            self.chains = None
        else:
            fixed = []
            chain = self.chains[0]
            for i in range(chain.n):
                if np.all(chain.samples[:, i] == chain.samples[0, i]): fixed.append(i)
            for chain in self.chains:
                chain.changeSamples(np.delete(chain.samples, fixed, 1))
        self.paramNames.deleteIndices(fixed)
        self._getParamIndices()

    def saveAsText(self, root, chain_index=None, make_dirs=False):
        """
        Saves the samples as text files, including parameter names as .paramnames file. 

        :param root: The root name to use
        :param chain_index: Optional index to be used for the filename, zero based, e.g. for saving one of multiple chains
        :param make_dirs: True if this should (recursively) create the directory if it doesn't exist
        """
        super(Chains, self).saveAsText(root, chain_index, make_dirs)
        if not chain_index: self.paramNames.saveAsText(root + '.paramnames')

    def savePickle(self, filename):
        """
        Save the current object to a file in pickle format

        :param filename: The file to write to
        """

        with open(filename, 'wb') as output:
            pickle.dump(self, output, pickle.HIGHEST_PROTOCOL)
Ejemplo n.º 10
0
class Chains(WeightedSamples):
    """
    Holds one or more sets of weighted samples, for example a set of MCMC chains.
    Inherits from :class:`~.chains.WeightedSamples`, also adding parameter names and labels

    :ivar paramNames: a :class:`~.paramnames.ParamNames` instance holding the parameter names and labels
    """

    def __init__(self, root=None, jobItem=None, paramNamesFile=None, names=None, labels=None, renames=None,
                 sampler=None, **kwargs):
        """

        :param root: optional root name for files
        :param jobItem: optional jobItem for parameter grid item
        :param paramNamesFile: optional filename of a .paramnames files that holds parameter names
        :param names: optional list of names for the parameters
        :param labels: optional list of latex labels for the parameters
        :param renames: optional dictionary of parameter aliases
        :param sampler: string describing the type of samples (default :mcmc); if "nested" or "uncorrelated"
              the effective number of samples is calculated using uncorrelated approximation
        :param kwargs: extra options for :class:`~.chains.WeightedSamples`'s constructor

        """
        self.chains = None
        WeightedSamples.__init__(self, **kwargs)
        self.jobItem = jobItem
        self.ignore_lines = float(kwargs.get('ignore_rows', 0))
        self.root = root
        if not paramNamesFile and root:
            mid = ('' if root.endswith("/") else "__")
            if os.path.exists(root + '.paramnames'):
                paramNamesFile = root + '.paramnames'
            elif os.path.exists(root + mid + 'full.yaml'):
                paramNamesFile = root + mid + 'full.yaml'
        self.setParamNames(paramNamesFile or names)
        if labels is not None:
            self.paramNames.setLabels(labels)
        if renames is not None:
            self.updateRenames(renames)
        # Sampler that generated the chain -- assume "mcmc"
        if isinstance(sampler, six.string_types):
            if sampler.lower() not in ["mcmc", "nested", "uncorrelated"]:
                raise ValueError("Unknown sampler type %s" % sampler)
            self.sampler = sampler.lower()
        elif isinstance(paramNamesFile, six.string_types) and paramNamesFile.endswith("yaml"):
            from getdist.yaml_format_tools import get_sampler_type
            self.sampler = get_sampler_type(paramNamesFile)
        else:
            self.sampler = "mcmc"

    def setParamNames(self, names=None):
        """
        Sets the names of the params.

        :param names: Either a :class:`~.paramnames.ParamNames` object, the name of a .paramnames file to load, a list of name strings,
                      otherwise use default names (param1, param2...).
        """
        self.paramNames = None
        if isinstance(names, ParamNames):
            self.paramNames = names
        elif isinstance(names, six.string_types):
            self.paramNames = ParamNames(names)
        elif names is not None:
            self.paramNames = ParamNames(names=names)
        elif self.samples is not None:
            self.paramNames = ParamNames(default=self.n)
        if self.paramNames:
            self._getParamIndices()
        self.needs_update = True

    def filter(self, where):
        """
        Filter the stored samples to keep only samples matching filter

        :param where: list of sample indices to keep, or boolean array filter (e.g. x>5 to keep only samples where x>5)
        """

        if self.chains is None:
            if hasattr(self, 'chain_offsets'):
                # must update chain_offsets to be able to correctly split back into separate filtered chains if needed
                lens = [0]
                for off1, off2 in zip(self.chain_offsets[:-1], self.chain_offsets[1:]):
                    lens.append(np.count_nonzero(where[off1:off2]))
                self.chain_offsets = np.cumsum(np.array(lens))
            super(Chains, self).filter(where)
        else:
            raise ValueError('chains are separated, makeSingle first or call filter on individual chains')

    def getParamNames(self):
        """
        Get :class:`~.paramnames.ParamNames` object with names for the parameters

        :return: :class:`~.paramnames.ParamNames` object giving parameter names and labels
        """
        return self.paramNames

    def _getParamIndices(self):
        """
        Gets the indices of the params.

        :return: A dict mapping the param name to the parameter index.
        """
        if self.samples is not None and len(self.paramNames.names) != self.n:
            raise WeightedSampleError("paramNames size does not match number of parameters in samples")
        index = dict()
        for i, name in enumerate(self.paramNames.names):
            index[name.name] = i
        self.index = index
        return self.index

    def getRenames(self):
        """
        Updates the renames known to each parameter with the given dictionary of renames.
        """
        return self.paramNames.getRenames()

    def updateRenames(self, renames):
        """
        Updates the renames known to each parameter with the given dictionary of renames.
        """
        self.paramNames.updateRenames(renames)

    def setParams(self, obj):
        """
        Adds array variables obj.name1, obj.name2 etc, where
        obj.name1 is the vector of samples with name 'name1'

        if a parameter name is of the form aa.bb.cc, it makes subobjects so you can reference obj.aa.bb.cc

        :param obj: The object instance to add the parameter vectors variables
        :return: The obj after alterations.
        """
        for i, name in enumerate(self.paramNames.names):
            path = name.name.split('.')
            ob = obj
            for p in path[:-1]:
                if not hasattr(ob, p):
                    setattr(ob, p, ParSamples())
                ob = getattr(ob, p)
            setattr(ob, path[-1], self.samples[:, i])
        return obj

    def getParams(self):
        """
        Creates a :class:`~.chains.ParSamples` object, with variables giving vectors for all the parameters,
        for example samples.getParams().name1 would be the vector of samples with name 'name1'

        :return: A :class:`~.chains.ParSamples` object containing all the parameter vectors, with attributes given by the parameter names
        """
        pars = ParSamples()
        self.setParams(pars)
        return pars

    def getParamSampleDict(self, ix):
        """
        Returns a dictionary of parameter values for sample number ix
        """
        from collections import OrderedDict
        res = OrderedDict()
        for i, name in enumerate(self.paramNames.names):
            res[name.name] = self.samples[ix, i]
        res['weight'] = self.weights
        res['loglike'] = self.loglikes
        return res

    def _makeParamvec(self, par):
        if self.needs_update: self.updateBaseStatistics()
        if isinstance(par, ParamInfo): par = par.name
        if isinstance(par, six.string_types):
            return self.samples[:, self.index[par]]
        return WeightedSamples._makeParamvec(self, par)

    def updateChainBaseStatistics(self):
        # old name, use updateBaseStatistics
        return self.updateBaseStatistics()

    def updateBaseStatistics(self):
        """
        Updates basic computed statistics for this chain, e.g. after any changes to the samples or weights

        :return: self after updating statistics.
        """
        self.getVars()
        self.mean_mult = self.norm / self.numrows
        self.max_mult = np.max(self.weights)
        self._getParamIndices()
        self.needs_update = False
        return self

    def addDerived(self, paramVec, name, **kwargs):
        """
        Adds a new parameter

        :param paramVec: The vector of parameter values to add.
        :param name: The name for the new parameter
        :param kwargs: arguments for paramnames' :func:`.paramnames.ParamList.addDerived`
        :return: The added parameter's :class:`~.paramnames.ParamInfo` object
        """
        if self.paramNames.parWithName(name):
            raise ValueError('Parameter with name %s already exists' % name)
        self.changeSamples(np.c_[self.samples, paramVec])
        return self.paramNames.addDerived(name, **kwargs)

    def loadChains(self, root, files_or_samples, weights=None, loglikes=None,
                   ignore_lines=None):
        """
        Loads chains from files.

        :param root: Root name
        :param files_or_samples: list of file names or list of arrays of samples, or single array of samples
        :param weights: if loading from arrays of samples, corresponding list of arrays of weights
        :param loglikes: if loading from arrays of samples, corresponding list of arrays of -2 log(likelihood)
        :param ignore_lines: Amount of lines at the start of the file to ignore, None if should not ignore
        :return: True if loaded successfully, False if none loaded
        """
        self.chains = []
        self.samples = None
        self.weights = None
        self.loglikes = None
        if ignore_lines is None: ignore_lines = self.ignore_lines
        WSkwargs = {"ignore_rows": ignore_lines,
                    "min_weight_ratio": self.min_weight_ratio}
        if isinstance(files_or_samples, six.string_types) or isinstance(files_or_samples[0], six.string_types):
            # From files
            if weights is not None or loglikes is not None:
                raise ValueError('weights and loglikes not needed reading from file')
            if isinstance(files_or_samples, six.string_types): files_or_samples = [files_or_samples]
            self.name_tag = self.name_tag or os.path.basename(root)
            for fname in files_or_samples:
                if print_load_details: print(fname)
                try:
                    self.chains.append(WeightedSamples(fname, **WSkwargs))
                except WeightedSampleError:
                    if print_load_details:
                        print('Ignored file %s (likely empty)' % fname)
            nchains = len(self.chains)
            if not nchains:
                raise WeightedSampleError('loadChains - no chains found for ' + root)
        else:
            # From arrays
            def array_dimension(a):
                # Dimension for numpy or list/tuple arrays, not very safe (does not work if string elements)
                d = 0
                while True:
                    try:
                        a = a[0]
                        d += 1
                    except:
                        return d

            dim = array_dimension(files_or_samples)
            if dim in [1, 2]:
                self.chains = None
                self.setSamples(slice_or_none(files_or_samples, ignore_lines),
                                slice_or_none(weights, ignore_lines),
                                slice_or_none(loglikes, ignore_lines), self.min_weight_ratio)
                if self.paramNames is None:
                    self.paramNames = ParamNames(default=self.n)
                nchains = 1
            elif dim == 3:
                for i, samples_i in enumerate(files_or_samples):
                    self.chains.append(WeightedSamples(
                        samples=samples_i, loglikes=None if loglikes is None else np.atleast_2d(loglikes)[i],
                        weights=None if weights is None else np.atleast_2d(weights)[i], **WSkwargs))
                if self.paramNames is None:
                    self.paramNames = ParamNames(default=self.chains[0].n)
                nchains = len(self.chains)
            else:
                raise ValueError('samples or files must be array of samples, or a list of arrays or files')
        self._weightsChanged()
        return nchains > 0

    def getGelmanRubinEigenvalues(self, nparam=None, chainlist=None):
        """
        Assess convergence using var(mean)/mean(var) in the orthogonalized parameters
        c.f. Brooks and Gelman 1997.

        :param nparam: The number of parameters (starting at first), by default uses all of them
        :param chainlist: list of :class:`~.chains.WeightedSamples`, the samples to use. Defaults to all the separate chains in this instance.
        :return: array of  var(mean)/mean(var) for orthogonalized parameters
        """
        if chainlist is None:
            chainlist = self.getSeparateChains()
        nparam = nparam or self.paramNames.numNonDerived()
        meanscov = np.zeros((nparam, nparam))
        means = self.getMeans()[:nparam]
        meancov = np.zeros(meanscov.shape)
        for chain in chainlist:
            diff = chain.getMeans()[:nparam] - means
            meanscov += np.outer(diff, diff)
            meancov += chain.getCov(nparam)
        meanscov /= (len(chainlist) - 1)
        meancov /= len(chainlist)
        w, U = np.linalg.eigh(meancov)
        if np.min(w) > 0:
            U /= np.sqrt(w)
            D = np.linalg.eigvalsh(np.dot(U.T, meanscov).dot(U))
            return D
        else:
            return None

    def getGelmanRubin(self, nparam=None, chainlist=None):
        """
        Assess the convergence using the maximum var(mean)/mean(var) of orthogonalized parameters
        c.f. Brooks and Gelman 1997.

        :param nparam: The number of parameters, by default uses all
        :param chainlist: list of :class:`~.chains.WeightedSamples`, the samples to use. Defaults to all the separate chains in this instance.
        :return: The worst var(mean)/mean(var) for orthogonalized parameters. Should be <<1 for good convergence.
        """
        return np.max(self.getGelmanRubinEigenvalues(nparam, chainlist))

    def makeSingle(self):
        """
        Combines separate chains into one samples array, so self.samples has all the samples
        and this instance can then be used as a general :class:`~.chains.WeightedSamples` instance.

        :return: self
        """
        self.chain_offsets = np.cumsum(np.array([0] + [chain.samples.shape[0] for chain in self.chains]))
        weights = None if self.chains[0].weights is None else np.hstack((chain.weights for chain in self.chains))
        loglikes = None if self.chains[0].loglikes is None else np.hstack((chain.loglikes for chain in self.chains))
        self.setSamples(np.vstack((chain.samples for chain in self.chains)), weights, loglikes, min_weight_ratio=-1)
        self.chains = None
        self.needs_update = True
        return self

    def getSeparateChains(self):
        """
        Gets a list of samples for separate chains.
        If the chains have already been combined, uses the stored sample offsets to reconstruct the array (generally no array copying)

        :return: The list of :class:`~.chains.WeightedSamples` for each chain.
        """
        if self.chains is not None:
            return self.chains
        chainlist = []
        for off1, off2 in zip(self.chain_offsets[:-1], self.chain_offsets[1:]):
            chainlist.append(WeightedSamples(samples=self.samples[off1:off2], weights=self.weights[off1:off2],
                                             loglikes=self.loglikes[off1:off2]))
        return chainlist

    def removeBurnFraction(self, ignore_frac):
        """
        Remove a fraction of the samples as burn in

        :param ignore_frac: fraction of sample points to remove from the start of the samples, or each chain if not combined
        """
        if self.samples is not None:
            self.removeBurn(ignore_frac)
            self.chains = None
            self.needs_update = True
        else:
            for chain in self.chains:
                chain.removeBurn(ignore_frac)

    def deleteFixedParams(self):
        """
        Delete parameters that are fixed (the same value in all samples)
        """
        if self.samples is not None:
            fixed = WeightedSamples.deleteFixedParams(self)
            self.chains = None
        else:
            fixed = []
            chain = self.chains[0]
            for i in range(chain.n):
                if np.all(chain.samples[:, i] == chain.samples[0, i]): fixed.append(i)
            for chain in self.chains:
                chain.changeSamples(np.delete(chain.samples, fixed, 1))
        self.paramNames.deleteIndices(fixed)
        self._getParamIndices()

    def saveAsText(self, root, chain_index=None, make_dirs=False):
        """
        Saves the samples as text files, including parameter names as .paramnames file.

        :param root: The root name to use
        :param chain_index: Optional index to be used for the filename, zero based, e.g. for saving one of multiple chains
        :param make_dirs: True if this should (recursively) create the directory if it doesn't exist
        """
        super(Chains, self).saveAsText(root, chain_index, make_dirs)
        if not chain_index: self.paramNames.saveAsText(root + '.paramnames')

    def savePickle(self, filename):
        """
        Save the current object to a file in pickle format

        :param filename: The file to write to
        """

        with open(filename, 'wb') as output:
            pickle.dump(self, output, pickle.HIGHEST_PROTOCOL)
Ejemplo n.º 11
0
    def loadChains(self, root, files_or_samples, weights=None, loglikes=None,
                   ignore_lines=None):
        """
        Loads chains from files.

        :param root: Root name
        :param files_or_samples: list of file names or list of arrays of samples, or single array of samples
        :param weights: if loading from arrays of samples, corresponding list of arrays of weights
        :param loglikes: if loading from arrays of samples, corresponding list of arrays of -2 log(likelihood)
        :param ignore_lines: Amount of lines at the start of the file to ignore, None if should not ignore
        :return: True if loaded successfully, False if none loaded
        """
        self.chains = []
        self.samples = None
        self.weights = None
        self.loglikes = None
        if ignore_lines is None: ignore_lines = self.ignore_lines
        WSkwargs = {"ignore_rows": ignore_lines,
                    "min_weight_ratio": self.min_weight_ratio}
        if isinstance(files_or_samples, six.string_types) or isinstance(files_or_samples[0], six.string_types):
            # From files
            if weights is not None or loglikes is not None:
                raise ValueError('weights and loglikes not needed reading from file')
            if isinstance(files_or_samples, six.string_types): files_or_samples = [files_or_samples]
            self.name_tag = self.name_tag or os.path.basename(root)
            for fname in files_or_samples:
                if print_load_details: print(fname)
                try:
                    self.chains.append(WeightedSamples(fname, **WSkwargs))
                except WeightedSampleError:
                    if print_load_details:
                        print('Ignored file %s (likely empty)' % fname)
            nchains = len(self.chains)
            if not nchains:
                raise WeightedSampleError('loadChains - no chains found for ' + root)
        else:
            # From arrays
            def array_dimension(a):
                # Dimension for numpy or list/tuple arrays, not very safe (does not work if string elements)
                d = 0
                while True:
                    try:
                        a = a[0]
                        d += 1
                    except:
                        return d

            dim = array_dimension(files_or_samples)
            if dim in [1, 2]:
                self.chains = None
                self.setSamples(slice_or_none(files_or_samples, ignore_lines),
                                slice_or_none(weights, ignore_lines),
                                slice_or_none(loglikes, ignore_lines), self.min_weight_ratio)
                if self.paramNames is None:
                    self.paramNames = ParamNames(default=self.n)
                nchains = 1
            elif dim == 3:
                for i, samples_i in enumerate(files_or_samples):
                    self.chains.append(WeightedSamples(
                        samples=samples_i, loglikes=None if loglikes is None else np.atleast_2d(loglikes)[i],
                        weights=None if weights is None else np.atleast_2d(weights)[i], **WSkwargs))
                if self.paramNames is None:
                    self.paramNames = ParamNames(default=self.chains[0].n)
                nchains = len(self.chains)
            else:
                raise ValueError('samples or files must be array of samples, or a list of arrays or files')
        self._weightsChanged()
        return nchains > 0