def __init__(self, means, covs, weights=None, lims=None, names=None, label='', labels=None): """ :param means: list of means for each Gaussian in the mixture :param covs: list of covariances for the Gaussians in the mixture :param weights: optional weight for each component (defaults to equal weight) :param lims: optional list of hard limits for each parameter, [[x1min,x1max], [x2min,x2max]]; use None for no limit :param names: list of names (strings) for each parameter. If not set, set to "param1", "param2"... :param label: name for labelling this mixture :param labels: list of latex labels for each parameter. If not set, defaults to p_{1}, p_{2}... """ self.means = np.asarray(means) self.dim = self.means.shape[1] self.covs = [np.array(cov) for cov in covs] self.invcovs = [np.linalg.inv(cov) for cov in self.covs] if weights is None: weights = [1. / len(means)] * len(means) self.weights = np.array(weights, dtype=np.float64) if np.sum(self.weights) <= 0: raise ValueError('Weight <= 0 in MixtureND') self.weights /= np.sum(weights) self.norms = (2 * np.pi) ** (0.5 * self.dim) * np.array([np.sqrt(np.linalg.det(cov)) for cov in self.covs]) self.lims = lims self.paramNames = ParamNames(names=names, default=self.dim, labels=labels) self.names = self.paramNames.list() self.label = label self.total_mean = np.atleast_1d(np.dot(self.weights, self.means)) self.total_cov = np.zeros((self.dim, self.dim)) for mean, cov, weight, totmean in zip(self.means, self.covs, self.weights, self.total_mean): self.total_cov += weight * (cov + np.outer(mean - totmean, mean - totmean))
def loadChains(self, root, files, ignore_lines=None): """ Loads chains from files. :param root: Root name :param files: list of file names :param ignore_lines: Amount of lines at the start of the file to ignore, None if should not ignore :return: True if loaded successfully, False if none loaded """ self.chains = [] self.samples = None self.weights = None self.loglikes = None self.name_tag = self.name_tag or os.path.basename(root) for fname in files: if print_load_details: print(fname) self.chains.append( WeightedSamples(fname, ignore_lines or self.ignore_lines, min_weight_ratio=self.min_weight_ratio)) if len(self.chains) == 0: raise WeightedSampleError('loadChains - no chains found for ' + root) if self.paramNames is None: self.paramNames = ParamNames(default=self.chains[0].n) self._weightsChanged() return len(self.chains) > 0
def loadChains(self, root, files_or_samples, weights=None, loglikes=None, ignore_lines=None): """ Loads chains from files. :param root: Root name :param files_or_samples: list of file names or list of arrays of samples, or single array of samples :param weights: if loading from arrays of samples, corresponding list of arrays of weights :param loglikes: if loading from arrays of samples, corresponding list of arrays of -2 log(likelihood) :param ignore_lines: Amount of lines at the start of the file to ignore, None if should not ignore :return: True if loaded successfully, False if none loaded """ self.chains = [] self.samples = None self.weights = None self.loglikes = None if ignore_lines is None: ignore_lines = self.ignore_lines WSkwargs = {"ignore_rows": ignore_lines, "min_weight_ratio": self.min_weight_ratio} if isinstance(files_or_samples, six.string_types) or isinstance(files_or_samples[0], six.string_types): # From files if weights is not None or loglikes is not None: raise ValueError('weights and loglikes not needed reading from file') if isinstance(files_or_samples, six.string_types): files_or_samples = [files_or_samples] self.name_tag = self.name_tag or os.path.basename(root) for fname in files_or_samples: if print_load_details: print(fname) self.chains.append(WeightedSamples(fname, **WSkwargs)) nchains = len(self.chains) if not nchains: raise WeightedSampleError('loadChains - no chains found for ' + root) else: # From arrays dim = array_dimension(files_or_samples) if dim in [1, 2]: self.setSamples(slice_or_none(files_or_samples, ignore_lines), slice_or_none(weights, ignore_lines), slice_or_none(loglikes, ignore_lines), self.min_weight_ratio) if self.paramNames is None: self.paramNames = ParamNames(default=self.n) nchains = 1 elif dim == 3: for i, samples_i in enumerate(files_or_samples): self.chains.append(WeightedSamples( samples=samples_i, loglikes=None if loglikes is None else np.atleast_2d(loglikes)[i], weights=None if weights is None else np.atleast_2d(weights)[i], **WSkwargs)) if self.paramNames is None: self.paramNames = ParamNames(default=self.chains[0].n) nchains = len(self.chains) else: raise ValueError('samples or files must be array of samples, or a list of arrays or files') self._weightsChanged() return nchains > 0
def loadChains(self, root, files, ignore_lines=None): """ Loads chains from files. :param root: Root name :param files: list of file names :param ignore_lines: Amount of lines at the start of the file to ignore, None if should not ignore :return: True if loaded successfully, False if none loaded """ self.chains = [] self.samples = None self.weights = None self.loglikes = None self.name_tag = self.name_tag or os.path.basename(root) for fname in files: if print_load_details: print(fname) self.chains.append(WeightedSamples(fname, ignore_lines or self.ignore_lines)) if len(self.chains) == 0: raise WeightedSampleError('loadChains - no chains found for ' + root) if self.paramNames is None: self.paramNames = ParamNames(default=self.chains[0].n) self._weightsChanged() return len(self.chains) > 0
def setParamNames(self, names=None): """ Sets the names of the params. :param names: Either a :class:`~.paramnames.ParamNames` object, the name of a .paramnames file to load, a list of name strings, otherwise use default names (param1, param2...). """ self.paramNames = None if isinstance(names, ParamNames): self.paramNames = names elif isinstance(names, six.string_types): self.paramNames = ParamNames(names) elif names is not None: self.paramNames = ParamNames(names=names) elif self.samples is not None: self.paramNames = ParamNames(default=self.n) if self.paramNames: self._getParamIndices()
class MixtureND(object): """ Gaussian mixture model with optional boundary ranges. Includes functions for generating samples and projecting. """ def __init__(self, means, covs, weights=None, lims=None, names=None, label='', labels=None): """ :param means: list of y for each Gaussian in the mixture :param covs: list of covariances for the Gaussians in the mixture :param weights: optional weight for each component (defaults to equal weight) :param lims: optional list of hard limits for each parameter, [[x1min,x1max], [x2min,x2max]]; use None for no limit :param names: list of names (strings) for each parameter. If not set, set to "param1", "param2"... :param label: name for labelling this mixture :param labels: list of latex labels for each parameter. If not set, defaults to p_{1}, p_{2}... """ self.means = np.asarray(means) self.dim = self.means.shape[1] self.covs = [np.array(cov) for cov in covs] self.invcovs = [np.linalg.inv(cov) for cov in self.covs] if weights is None: weights = [1. / len(means)] * len(means) self.weights = np.array(weights, dtype=np.float64) if np.sum(self.weights) <= 0: raise ValueError('Weight <= 0 in MixtureND') self.weights /= np.sum(weights) self.norms = (2 * np.pi)**(0.5 * self.dim) * np.array( [np.sqrt(np.linalg.det(cov)) for cov in self.covs]) self.lims = lims self.paramNames = ParamNames(names=names, default=self.dim, labels=labels) self.names = self.paramNames.list() self.label = label self.total_mean = np.atleast_1d(np.dot(self.weights, self.means)) self.total_cov = np.zeros((self.dim, self.dim)) for mean, cov, weight, totmean in zip(self.means, self.covs, self.weights, self.total_mean): self.total_cov += weight * ( cov + np.outer(mean - totmean, mean - totmean)) def sim(self, size): """ Generate an array of independent samples :param size: number of samples :return: 2D array of sample values """ tot = 0 res = [] block = None while True: for num, mean, cov in zip( np.random.multinomial(block or size, self.weights), self.means, self.covs): if num > 0: v = np.random.multivariate_normal(mean, cov, size=num) if self.lims is not None: for i, (mn, mx) in enumerate(self.lims): if mn is not None: v = v[v[:, i] >= mn] if mx is not None: v = v[v[:, i] <= mx] tot += v.shape[0] res.append(v) if tot >= size: break if block is None: block = min( max(size, 100000), int(1.1 * (size * (size - tot))) // max(tot, 1) + 1) samples = np.vstack(res) if len(res) > 1: samples = np.random.permutation(samples) if tot != size: samples = samples[:-(tot - size), :] return samples def MCSamples(self, size, names=None, logLikes=False, **kwargs): """ Gets a set of independent samples from the mixture as a :class:`.mcsamples.MCSamples` object ready for plotting etc. :param size: number of samples :param names: set to override existing names :param logLikes: if True set the sample likelihood values from the pdf, if false, don't store log likelihoods :return: a new :class:`.mcsamples.MCSamples` instance """ samples = self.sim(size) if logLikes: loglikes = -np.log(self.pdf(samples)) else: loglikes = None return MCSamples(samples=samples, loglikes=loglikes, paramNamesFile=copy.deepcopy(self.paramNames), names=names, ranges=self.lims, **kwargs) def autoRanges(self, sigma_max=4, lims=None): res = [] if lims is None: lims = self.lims if lims is None: lims = [(None, None) for _ in range(self.dim)] for i, (mn, mx) in enumerate(lims): covmin = None covmax = None if mn is None or mx is None: for mean, cov in zip(self.means, self.covs): sigma = np.sqrt(cov[i, i]) xmin, xmax = mean[i] - sigma_max * sigma, mean[ i] + sigma_max * sigma if mn is not None: xmax = max(xmax, mn + sigma_max * sigma) if mx is not None: xmin = min(xmin, mx - sigma_max * sigma) covmin = min(xmin, covmin) if covmin is not None else xmin covmax = max(xmax, covmax) if covmax is not None else xmax res.append( (covmin if mn is None else mn, covmax if mx is None else mx)) return res def pdf(self, x): """ Calculate the PDF. Note this assumes x is within the boundaries (does not return zero outside) Result is also only normalized if no boundaries. :param x: array of parameter values to evaluate at :return: pdf at x """ tot = None x = np.asarray(x) for i, (mean, icov, weight, norm) in enumerate( zip(self.means, self.invcovs, self.weights, self.norms)): dx = x - mean if len(x.shape) == 1: res = np.exp(-icov.dot(dx).dot(dx) / 2) / norm else: res = np.exp( -np.einsum('ik,km,im->i', dx, icov, dx) / 2) / norm if not i: tot = res * weight else: tot += res * weight return tot def pdf_marged(self, index, x, no_limit_marge=False): """ Calculate the 1D marginalized PDF. Only works if no other parameter limits are marginalized :param index: index or name of parameter :param x: value to evaluate PDF at :param no_limit_marge: if true don't raise an error if mixture has limits :return: marginalized 1D pdf at x """ if isinstance(index, six.string_types): index = self.names.index(index) if not no_limit_marge: self.checkNoLimits([index]) tot = None for i, (mean, cov, weight) in enumerate(zip(self.means, self.covs, self.weights)): dx = x - mean[index] var = cov[index, index] res = np.exp(-dx**2 / var / 2) / np.sqrt(2 * np.pi * var) if not i: tot = res * weight else: tot += res * weight return tot def density1D(self, index=0, num_points=1024, sigma_max=4, no_limit_marge=False): """ Get 1D marginalized density. Only works if no hard limits in other parameters. :param index: parameter name or index :param num_points: number of grid points to evaluate PDF :param sigma_max: maximum number of standard deviations away from y to include in computed range :param no_limit_marge: if true don't raise error if limits on other parameters :return: :class:`~.densities.Density1D` instance """ if isinstance(index, six.string_types): index = self.names.index(index) if not no_limit_marge: self.checkNoLimits([index]) mn, mx = self.autoRanges(sigma_max)[index] x = np.linspace(mn, mx, num_points) like = self.pdf_marged(index, x) return Density1D(x, like) def density2D(self, params=None, num_points=1024, xmin=None, xmax=None, ymin=None, ymax=None, sigma_max=5): """ Get 2D marginalized density for a pair of parameters. :param params: list of two parameter names or indices to use. If already 2D, can be None. :param num_points: number of grid points for evaluation :param xmin: optional lower value for first parameter :param xmax: optional upper value for first parameter :param ymin: optional lower value for second parameter :param ymax: optional upper value for second parameter :param sigma_max: maximum number of standard deviations away from mean to include in calculated range :return: :class:`~.densities.Density2D` instance """ if self.dim > 2 or params is not None or not isinstance( self, Mixture2D): mixture = self.marginalizedMixture(params=params) elif self.dim != 2: raise Exception('density2D requires at least two dimensions') else: mixture = self return mixture._density2D(num_points=num_points, xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax, sigma_max=sigma_max) def _params_to_indices(self, params): indices = [] if params is None: params = self.names for p in params: if isinstance(p, six.string_types): indices.append(self.names.index(p)) elif hasattr(p, 'name'): indices.append(self.names.index(p.name)) else: indices.append(p) return indices def marginalizedMixture(self, params, label=None, no_limit_marge=False): """ Calculates a reduced mixture model by marginalization over unwanted parameters :param params: array of parameter names or indices to retain. If none, will simply return a copy of this mixture. :param label: optional label for the marginalized mixture :param no_limit_marge: if true don't raise an error if mixture has limits. :return: a new marginalized :class:`MixtureND` instance """ indices = self._params_to_indices(params) if not no_limit_marge: self.checkNoLimits(indices) indices = np.array(indices) if self.names is not None: names = [self.names[i] for i in indices] else: names = None if self.lims is not None: lims = [self.lims[i] for i in indices] else: lims = None if label is None: label = self.label covs = [cov[np.ix_(indices, indices)] for cov in self.covs] means = [mean[indices] for mean in self.means] if len(indices) == 2: tp = Mixture2D else: tp = MixtureND mixture = tp(means, covs, self.weights, lims=lims, names=names, label=label) mixture.paramNames.setLabelsAndDerivedFromParamNames(self.paramNames) return mixture def conditionalMixture(self, fixed_params, fixed_param_values, label=None): """ Returns a reduced conditional mixture model for the distribution when certainly parameters are fixed. :param fixed_params: list of names or numbers of parameters to fix :param fixed_param_values: list of values for the fixed parameters :param label: optional label for the new mixture :return: A new :class:`MixtureND` instance with cov_i = Projection(Cov_i^{-1})^{-1} and shifted conditional y """ fixed_params = self._params_to_indices(fixed_params) self.checkNoLimits(fixed_params) keep_params = [i for i in range(self.dim) if not i in fixed_params] if not len(keep_params): raise ValueError( 'conditionalMixture must leave at least one non-fixed parameter' ) new_means = [] new_covs = [] new_weights = [] for mean, cov, invcov, weight in zip(self.means, self.covs, self.invcovs, self.weights): deltas = np.asarray(fixed_param_values) - mean[fixed_params] new_cov = np.linalg.inv(invcov[np.ix_(keep_params, keep_params)]) new_mean = mean[keep_params] - new_cov.dot(invcov[np.ix_( keep_params, fixed_params)].dot(deltas)) if len(self.weights) == 1 and False: logw = 0 else: logw = invcov[np.ix_(fixed_params, fixed_params)].dot(deltas).dot(deltas) \ + np.log(np.linalg.det(cov[np.ix_(fixed_params, fixed_params)] - cov[np.ix_(fixed_params, keep_params)].dot( np.linalg.inv(cov[np.ix_(keep_params, keep_params)]).dot( cov[np.ix_(keep_params, fixed_params)])))) new_weights.append(logw) new_means.append(new_mean) new_covs.append(new_cov) new_weights = np.exp(-(np.asarray(new_weights) - min(new_weights)) / 2) if self.names is not None: names = [self.names[i] for i in keep_params] else: names = None mixture = MixtureND(new_means, new_covs, new_weights, names=names, label=label) mixture.paramNames.setLabelsAndDerivedFromParamNames(self.paramNames) return mixture def checkNoLimits(self, keep_params): if self.lims is None: return for i, lim in enumerate(self.lims): if i not in keep_params and (lim[0] is not None or lim[1] is not None): raise Exception( 'In general can only marginalize analytically if no hard boundary limits: ' + self.label) def getUpper(self, name): if self.lims is None: return None return self.lims[self.names.index(name)][1] def getLower(self, name): if self.lims is None: return None return self.lims[self.names.index(name)][1]
class MixtureND(object): """ Gaussian mixture model with optional boundary ranges. Includes functions for generating samples and projecting. """ def __init__(self, means, covs, weights=None, lims=None, names=None, label='', labels=None): """ :param means: list of means for each Gaussian in the mixture :param covs: list of covariances for the Gaussians in the mixture :param weights: optional weight for each component (defaults to equal weight) :param lims: optional list of hard limits for each parameter, [[x1min,x1max], [x2min,x2max]]; use None for no limit :param names: list of names (strings) for each parameter. If not set, set to "param1", "param2"... :param label: name for labelling this mixture :param labels: list of latex labels for each parameter. If not set, defaults to p_{1}, p_{2}... """ self.means = np.asarray(means) self.dim = self.means.shape[1] self.covs = [np.array(cov) for cov in covs] self.invcovs = [np.linalg.inv(cov) for cov in self.covs] if weights is None: weights = [1. / len(means)] * len(means) self.weights = np.array(weights, dtype=np.float64) if np.sum(self.weights) <= 0: raise ValueError('Weight <= 0 in MixtureND') self.weights /= np.sum(weights) self.norms = (2 * np.pi) ** (0.5 * self.dim) * np.array([np.sqrt(np.linalg.det(cov)) for cov in self.covs]) self.lims = lims self.paramNames = ParamNames(names=names, default=self.dim, labels=labels) self.names = self.paramNames.list() self.label = label self.total_mean = np.atleast_1d(np.dot(self.weights, self.means)) self.total_cov = np.zeros((self.dim, self.dim)) for mean, cov, weight, totmean in zip(self.means, self.covs, self.weights, self.total_mean): self.total_cov += weight * (cov + np.outer(mean - totmean, mean - totmean)) def sim(self, size): """ Generate an array of independent samples :param size: number of samples :return: 2D array of sample values """ tot = 0 res = [] block = None while True: for num, mean, cov in zip(np.random.multinomial(block or size, self.weights), self.means, self.covs): if num > 0: v = np.random.multivariate_normal(mean, cov, size=num) if self.lims is not None: for i, (mn, mx) in enumerate(self.lims): if mn is not None: v = v[v[:, i] >= mn] if mx is not None: v = v[v[:, i] <= mx] tot += v.shape[0] res.append(v) if tot >= size: break if block is None: block = min(max(size, 100000), int(1.1 * (size * (size - tot))) // max(tot, 1) + 1) samples = np.vstack(res) if len(res) > 1: samples = np.random.permutation(samples) if tot != size: samples = samples[:-(tot - size), :] return samples def MCSamples(self, size, names=None, logLikes=False, **kwargs): """ Gets a set of independent samples from the mixture as a :class:`.mcsamples.MCSamples` object ready for plotting etc. :param size: number of samples :param names: set to override existing names :param logLikes: if True set the sample likelihood values from the pdf, if false, don't store log likelihoods :return: list of [x,y] pair names """ samples = self.sim(size) if logLikes: loglikes = -np.log(self.pdf(samples)) else: loglikes = None return MCSamples(samples=samples, loglikes=loglikes, paramNamesFile=self.paramNames, names=names, ranges=self.lims, **kwargs) def autoRanges(self, sigma_max=4, lims=None): res = [] if lims is None: lims = self.lims if lims is None: lims = [(None, None) for _ in range(self.dim)] for i, (mn, mx) in enumerate(lims): covmin = None covmax = None if mn is None or mx is None: for mean, cov in zip(self.means, self.covs): sigma = np.sqrt(cov[i, i]) xmin, xmax = mean[i] - sigma_max * sigma, mean[i] + sigma_max * sigma if mn is not None: xmax = max(xmax, mn + sigma_max * sigma) if mx is not None: xmin = min(xmin, mx - sigma_max * sigma) covmin = min(xmin, covmin) if covmin is not None else xmin covmax = max(xmax, covmax) if covmax is not None else xmax res.append((covmin if mn is None else mn, covmax if mx is None else mx)) return res def pdf(self, x): """ Calculate the PDF. Note this assumes x is within the boundaries (does not return zero outside) Result is also only normalized if no boundaries. :param x: array of parameter values to evaluate at :return: pdf at x """ tot = None x = np.asarray(x) for i, (mean, icov, weight, norm) in enumerate(zip(self.means, self.invcovs, self.weights, self.norms)): dx = x - mean if len(x.shape) == 1: res = np.exp(-icov.dot(dx).dot(dx) / 2) / norm else: res = np.exp(-np.einsum('ik,km,im->i', dx, icov, dx) / 2) / norm if not i: tot = res * weight else: tot += res * weight return tot def pdf_marged(self, index, x, no_limit_marge=False): """ Calculate the 1D marginalized PDF. Only works if no other parameter limits are marginalized :param index: index or name of parameter :param x: value to evaluate PDF at :param no_limit_marge: if true don't raise an error if mixture has limits :return: marginalized 1D pdf at x """ if isinstance(index, six.string_types): index = self.names.index(index) if not no_limit_marge: self.checkNoLimits([index]) tot = None for i, (mean, cov, weight) in enumerate(zip(self.means, self.covs, self.weights)): dx = x - mean[index] var = cov[index, index] res = np.exp(-dx ** 2 / var / 2) / np.sqrt(2 * np.pi * var) if not i: tot = res * weight else: tot += res * weight return tot def density1D(self, index=0, num_points=1024, sigma_max=4, no_limit_marge=False): """ Get 1D marginalized density. Only works if no hard limits in other parameters. :param index: parameter name or index :param num_points: number of grid points to evaluate PDF :param sigma_max: maximum number of standard deviations away from means to include in computed range :param no_limit_marge: if true don't raise error if limits on other parameters :return: :class:`~.densities.Density1D` instance """ if isinstance(index, six.string_types): index = self.names.index(index) if not no_limit_marge: self.checkNoLimits([index]) mn, mx = self.autoRanges(sigma_max)[index] x = np.linspace(mn, mx, num_points) like = self.pdf_marged(index, x) return Density1D(x, like) def density2D(self, params=None, num_points=1024, xmin=None, xmax=None, ymin=None, ymax=None, sigma_max=5): """ Get 2D marginalized density for a pair of parameters. :param params: list of two parameter names or indices to use. If already 2D, can be None. :param num_points: number of grid points for evaluation :param xmin: optional lower value for first parameter :param xmax: optional upper value for first parameter :param ymin: optional lower value for second parameter :param ymax: optional upper value for second parameter :param sigma_max: maximum number of standard deviations away from mean to include in calculated range :return: :class:`~.densities.Density2D` instance """ if self.dim > 2 or params is not None or not isinstance(self, Mixture2D): mixture = self.marginalizedMixture(params=params) elif self.dim != 2: raise Exception('density2D requires at least two dimensions') else: mixture = self return mixture._density2D(num_points=num_points, xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax, sigma_max=sigma_max) def _params_to_indices(self, params): indices = [] if params is None: params = self.names for p in params: if isinstance(p, six.string_types): indices.append(self.names.index(p)) elif hasattr(p, 'name'): indices.append(self.names.index(p.name)) else: indices.append(p) return indices def marginalizedMixture(self, params, label=None, no_limit_marge=False): """ Calculates a reduced mixture model by marginalization over unwanted parameters :param params: array of parameter names or indices to retain. If none, will simply return a copy of this mixture. :param label: optional label for the marginalized mixture :param no_limit_marge: if true don't raise an error if mixture has limits. :return: a new marginalized :class:`MixtureND` instance """ indices = self._params_to_indices(params) if not no_limit_marge: self.checkNoLimits(indices) indices = np.array(indices) if self.names is not None: names = [self.names[i] for i in indices] else: names = None if self.lims is not None: lims = [self.lims[i] for i in indices] else: lims = None if label is None: label = self.label covs = [cov[np.ix_(indices, indices)] for cov in self.covs] means = [mean[indices] for mean in self.means] if len(indices) == 2: tp = Mixture2D else: tp = MixtureND mixture = tp(means, covs, self.weights, lims=lims, names=names, label=label) mixture.paramNames.setLabelsAndDerivedFromParamNames(self.paramNames) return mixture def conditionalMixture(self, fixed_params, fixed_param_values, label=None): """ Returns a reduced conditional mixture model for the distribution when certainly parameters are fixed. :param fixed_params: list of names or numbers of parameters to fix :param fixed_param_values: list of values for the fixed parameters :param label: optional label for the new mixture :return: A new :class:`MixtureND` instance with cov_i = Projection(Cov_i^{-1})^{-1} and shifted conditional means """ fixed_params = self._params_to_indices(fixed_params) self.checkNoLimits(fixed_params) keep_params = [i for i in range(self.dim) if not i in fixed_params] if not len(keep_params): raise ValueError('conditionalMixture must leave at least one non-fixed parameter') new_means = [] new_covs = [] new_weights = [] for mean, cov, invcov, weight in zip(self.means, self.covs, self.invcovs, self.weights): deltas = np.asarray(fixed_param_values) - mean[fixed_params] new_cov = np.linalg.inv(invcov[np.ix_(keep_params, keep_params)]) new_mean = mean[keep_params] - new_cov.dot(invcov[np.ix_(keep_params, fixed_params)].dot(deltas)) if len(self.weights) == 1 and False: logw = 0 else: logw = invcov[np.ix_(fixed_params, fixed_params)].dot(deltas).dot(deltas) \ + np.log(np.linalg.det(cov[np.ix_(fixed_params, fixed_params)] - cov[np.ix_(fixed_params, keep_params)].dot( np.linalg.inv(cov[np.ix_(keep_params, keep_params)]).dot( cov[np.ix_(keep_params, fixed_params)])))) new_weights.append(logw) new_means.append(new_mean) new_covs.append(new_cov) new_weights = np.exp(-(np.asarray(new_weights) - min(new_weights)) / 2) if self.names is not None: names = [self.names[i] for i in keep_params] else: names = None mixture = MixtureND(new_means, new_covs, new_weights, names=names, label=label) mixture.paramNames.setLabelsAndDerivedFromParamNames(self.paramNames) return mixture def checkNoLimits(self, keep_params): if self.lims is None: return for i, lim in enumerate(self.lims): if not i in keep_params and (lim[0] is not None or lim[1] is not None): raise Exception( 'In general can only marginalize analytically if no hard boundary limits: ' + self.label) def getUpper(self, name): if self.lims is None: return None return self.lims[self.names.index(name)][1] def getLower(self, name): if self.lims is None: return None return self.lims[self.names.index(name)][1]
class Chains(WeightedSamples): """ Holds one or more sets of weighted samples, for example a set of MCMC chains. Inherits from :class:`~.chains.WeightedSamples`, also adding parameter names and labels :ivar paramNames: a :class:`~.paramnames.ParamNames` instance holding the parameter names and labels """ def __init__(self, root=None, jobItem=None, paramNamesFile=None, names=None, labels=None, **kwargs): """ :param root: optional root name for files :param jobItem: optional jobItem for parameter grid item :param paramNamesFile: optional filename of a .paramnames files that holds parameter names :param names: optional list of names for the parameters :param labels: optional list of latex labels for the parameters :param kwargs: extra options for :class:`~.chains.WeightedSamples`'s constructor """ self.chains = None WeightedSamples.__init__(self, **kwargs) self.jobItem = jobItem self.ignore_lines = float(kwargs.get('ignore_rows', 0)) self.root = root if not paramNamesFile and root: mid = ('' if root.endswith("/") else "__") if os.path.exists(root + '.paramnames'): paramNamesFile = root + '.paramnames' elif os.path.exists(root + mid + 'full.yaml'): paramNamesFile = root + mid + 'full.yaml' self.setParamNames(paramNamesFile or names) if labels is not None: self.paramNames.setLabels(labels) def setParamNames(self, names=None): """ Sets the names of the params. :param names: Either a :class:`~.paramnames.ParamNames` object, the name of a .paramnames file to load, a list of name strings, otherwise use default names (param1, param2...). """ self.paramNames = None if isinstance(names, ParamNames): self.paramNames = names elif isinstance(names, six.string_types): self.paramNames = ParamNames(names) elif names is not None: self.paramNames = ParamNames(names=names) elif self.samples is not None: self.paramNames = ParamNames(default=self.n) if self.paramNames: self._getParamIndices() self.needs_update = True def filter(self, where): """ Filter the stored samples to keep only samples matching filter :param where: list of sample indices to keep, or boolean array filter (e.g. x>5 to keep only samples where x>5) """ if self.chains is None: if hasattr(self, 'chain_offsets'): # must update chain_offsets to be able to correctly split back into separate filtered chains if needed lens = [0] for off1, off2 in zip(self.chain_offsets[:-1], self.chain_offsets[1:]): lens.append(np.count_nonzero(where[off1:off2])) self.chain_offsets = np.cumsum(np.array(lens)) super(Chains, self).filter(where) else: raise ValueError('chains are separated, makeSingle first or call filter on individual chains') def getParamNames(self): """ Get :class:`~.paramnames.ParamNames` object with names for the parameters :return: :class:`~.paramnames.ParamNames` object giving parameter names and labels """ return self.paramNames def _getParamIndices(self): """ Gets the indices of the params. :return: A dict mapping the param name to the parameter index. """ if self.samples is not None and len(self.paramNames.names) != self.n: raise WeightedSampleError("paramNames size does not match number of parameters in samples") index = dict() for i, name in enumerate(self.paramNames.names): index[name.name] = i self.index = index return self.index def setParams(self, obj): """ Adds array variables obj.name1, obj.name2 etc, where obj.name1 is the vector of samples with name 'name1' if a parameter name is of the form aa.bb.cc, it makes subobjects so you can reference obj.aa.bb.cc :param obj: The object instance to add the parameter vectors variables :return: The obj after alterations. """ for i, name in enumerate(self.paramNames.names): path = name.name.split('.') ob = obj for p in path[:-1]: if not hasattr(ob, p): setattr(ob, p, ParSamples()) ob = getattr(ob, p) setattr(ob, path[-1], self.samples[:, i]) return obj def getParams(self): """ Creates a :class:`~.chains.ParSamples` object, with variables giving vectors for all the parameters, for example samples.getParams().name1 would be the vector of samples with name 'name1' :return: A :class:`~.chains.ParSamples` object containing all the parameter vectors, with attributes given by the parameter names """ pars = ParSamples() self.setParams(pars) return pars def getParamSampleDict(self, ix): """ Returns a dictionary of parameter values for sample number ix """ from collections import OrderedDict res = OrderedDict() for i, name in enumerate(self.paramNames.names): res[name.name] = self.samples[ix, i] res['weight'] = self.weights[i] res['loglike'] = self.loglikes[i] return res def _makeParamvec(self, par): if self.needs_update: self.updateBaseStatistics() if isinstance(par, ParamInfo): par = par.name if isinstance(par, six.string_types): return self.samples[:, self.index[par]] return WeightedSamples._makeParamvec(self, par) def updateChainBaseStatistics(self): # old name, use updateBaseStatistics return self.updateBaseStatistics() def updateBaseStatistics(self): """ Updates basic computed statistics for this chain, e.g. after any changes to the samples or weights :return: self after updating statistics. """ self.getVars() self.mean_mult = self.norm / self.numrows self.max_mult = np.max(self.weights) self._getParamIndices() self.needs_update = False return self def addDerived(self, paramVec, name, **kwargs): """ Adds a new parameter :param paramVec: The vector of parameter values to add. :param name: The name for the new parameter :param kwargs: arguments for paramnames' :func:`~.paramnames.ParamList.addDerived` :return: The added parameter's :class:`~.paramnames.ParamInfo` object """ if self.paramNames.parWithName(name): raise ValueError('Parameter with name %s already exists' % name) self.changeSamples(np.c_[self.samples, paramVec]) return self.paramNames.addDerived(name, **kwargs) def loadChains(self, root, files, ignore_lines=None): """ Loads chains from files. :param root: Root name :param files: list of file names :param ignore_lines: Amount of lines at the start of the file to ignore, None if should not ignore :return: True if loaded successfully, False if none loaded """ self.chains = [] self.samples = None self.weights = None self.loglikes = None self.name_tag = self.name_tag or os.path.basename(root) for fname in files: if print_load_details: print(fname) self.chains.append( WeightedSamples(fname, ignore_lines or self.ignore_lines, min_weight_ratio=self.min_weight_ratio)) if len(self.chains) == 0: raise WeightedSampleError('loadChains - no chains found for ' + root) if self.paramNames is None: self.paramNames = ParamNames(default=self.chains[0].n) self._weightsChanged() return len(self.chains) > 0 def getGelmanRubinEigenvalues(self, nparam=None, chainlist=None): """ Assess convergence using var(mean)/mean(var) in the orthogonalized parameters c.f. Brooks and Gelman 1997. :param nparam: The number of parameters (starting at first), by default uses all of them :param chainlist: list of :class:`WeightedSamples`, the samples to use. Defaults to all the separate chains in this instance. :return: array of var(mean)/mean(var) for orthogonalized parameters """ if chainlist is None: chainlist = self.getSeparateChains() nparam = nparam or self.paramNames.numNonDerived() meanscov = np.zeros((nparam, nparam)) means = self.getMeans()[:nparam] meancov = np.zeros(meanscov.shape) for chain in chainlist: diff = chain.getMeans()[:nparam] - means meanscov += np.outer(diff, diff) meancov += chain.getCov(nparam) meanscov /= (len(chainlist) - 1) meancov /= len(chainlist) w, U = np.linalg.eigh(meancov) if np.min(w) > 0: U /= np.sqrt(w) D = np.linalg.eigvalsh(np.dot(U.T, meanscov).dot(U)) return D else: return None def getGelmanRubin(self, nparam=None, chainlist=None): """ Assess the convergence using the maximum var(mean)/mean(var) of orthogonalized parameters c.f. Brooks and Gelman 1997. :param nparam: The number of parameters, by default uses all :param chainlist: list of :class:`WeightedSamples`, the samples to use. Defaults to all the separate chains in this instance. :return: The worst var(mean)/mean(var) for orthogonalized parameters. Should be <<1 for good convergence. """ return np.max(self.getGelmanRubinEigenvalues(nparam, chainlist)) def makeSingle(self): """ Combines separate chains into one samples array, so self.samples has all the samples and this instance can then be used as a general :class:`~.chains.WeightedSamples` instance. :return: self """ self.chain_offsets = np.cumsum(np.array([0] + [chain.samples.shape[0] for chain in self.chains])) weights = np.hstack((chain.weights for chain in self.chains)) loglikes = np.hstack((chain.loglikes for chain in self.chains)) self.setSamples(np.vstack((chain.samples for chain in self.chains)), weights, loglikes, min_weight_ratio=-1) self.chains = None self.needs_update = True return self def getSeparateChains(self): """ Gets a list of samples for separate chains. If the chains have already been combined, uses the stored sample offsets to reconstruct the array (generally no array copying) :return: The list of :class:`~.chains.WeightedSamples` for each chain. """ if self.chains is not None: return self.chains chainlist = [] for off1, off2 in zip(self.chain_offsets[:-1], self.chain_offsets[1:]): chainlist.append(WeightedSamples(samples=self.samples[off1:off2], weights=self.weights[off1:off2], loglikes=self.loglikes[off1:off2])) return chainlist def removeBurnFraction(self, ignore_frac): """ Remove a fraction of the samples as burn in :param ignore_frac: fraction of sample points to remove from the start of the samples, or each chain if not combined """ if self.samples is not None: self.removeBurn(ignore_frac) self.chains = None self.needs_update = True else: for chain in self.chains: chain.removeBurn(ignore_frac) def deleteFixedParams(self): """ Delete parameters that are fixed (the same value in all samples) """ if self.samples is not None: fixed = WeightedSamples.deleteFixedParams(self) self.chains = None else: fixed = [] chain = self.chains[0] for i in range(chain.n): if np.all(chain.samples[:, i] == chain.samples[0, i]): fixed.append(i) for chain in self.chains: chain.changeSamples(np.delete(chain.samples, fixed, 1)) self.paramNames.deleteIndices(fixed) self._getParamIndices() def saveAsText(self, root, chain_index=None, make_dirs=False): """ Saves the samples as text files, including parameter names as .paramnames file. :param root: The root name to use :param chain_index: Optional index to be used for the filename, zero based, e.g. for saving one of multiple chains :param make_dirs: True if this should (recursively) create the directory if it doesn't exist """ super(Chains, self).saveAsText(root, chain_index, make_dirs) if not chain_index: self.paramNames.saveAsText(root + '.paramnames') def savePickle(self, filename): """ Save the current object to a file in pickle format :param filename: The file to write to """ with open(filename, 'wb') as output: pickle.dump(self, output, pickle.HIGHEST_PROTOCOL)
class Chains(WeightedSamples): """ Holds one or more sets of weighted samples, for example a set of MCMC chains. Inherits from :class:`~.chains.WeightedSamples`, also adding parameter names and labels :ivar paramNames: a :class:`~.paramnames.ParamNames` instance holding the parameter names and labels """ def __init__(self, root=None, jobItem=None, paramNamesFile=None, names=None, labels=None, **kwargs): """ :param root: optional root name for files :param jobItem: optional jobItem for parameter grid item :param paramNamesFile: optional filename of a .paramnames files that holds parameter names :param names: optional list of names for the parameters :param labels: optional list of latex labels for the parameters :param kwargs: extra options for :class:`~.chains.WeightedSamples`'s constructor """ WeightedSamples.__init__(self, **kwargs) self.jobItem = jobItem self.precision = '%.8e' self.ignore_lines = float(kwargs.get('ignore_rows', 0)) self.root = root if not paramNamesFile and root and os.path.exists(root + '.paramnames'): paramNamesFile = root + '.paramnames' self.needs_update = True self.chains = None self.setParamNames(paramNamesFile or names) if labels is not None: self.paramNames.setLabels(labels) def setParamNames(self, names=None): """ Sets the names of the params. :param names: Either a :class:`~.paramnames.ParamNames` object, the name of a .paramnames file to load, a list of name strings, otherwise use default names (param1, param2...). """ self.paramNames = None if isinstance(names, ParamNames): self.paramNames = names elif isinstance(names, six.string_types): self.paramNames = ParamNames(names) elif names is not None: self.paramNames = ParamNames(names=names) elif self.samples is not None: self.paramNames = ParamNames(default=self.n) if self.paramNames: self._getParamIndices() def getParamNames(self): """ Get :class:`~.paramnames.ParamNames` object with names for the parameters :return: :class:`~.paramnames.ParamNames` object giving parameter names and labels """ return self.paramNames def _getParamIndices(self): """ Gets the indices of the params. :return: A dict mapping the param name to the parameter index. """ if self.samples is not None and len(self.paramNames.names) != self.n: raise WeightedSampleError("paramNames size does not match number of parameters in samples") index = dict() for i, name in enumerate(self.paramNames.names): index[name.name] = i self.index = index return self.index def setParams(self, obj): """ Adds array variables obj.name1, obj.name2 etc, where obj.name1 is the vector of samples with name 'name1' if a parameter name is of the form aa.bb.cc, it makes subobjects so you can reference obj.aa.bb.cc :param obj: The object instance to add the parameter vectors variables :return: The obj after alterations. """ for i, name in enumerate(self.paramNames.names): path = name.name.split('.') ob = obj for p in path[:-1]: if not hasattr(ob, p): setattr(ob, p, ParSamples()) ob = getattr(ob, p) setattr(ob, path[-1], self.samples[:, i]) return obj def getParams(self): """ Creates a :class:`~.chains.ParSamples` object, with variables giving vectors for all the parameters, for example samples.getParams().name1 would be the vector of samples with name 'name1' :return: A :class:`~.chains.ParSamples` object containing all the parameter vectors, with attributes given by the parameter names """ pars = ParSamples() self.setParams(pars) return pars def _makeParamvec(self, par): if self.needs_update: self.updateBaseStatistics() if isinstance(par, ParamInfo): par = par.name if isinstance(par, six.string_types): return self.samples[:, self.index[par]] return WeightedSamples._makeParamvec(self, par) def updateChainBaseStatistics(self): # old name, use updateBaseStatistics return self.updateBaseStatistics() def updateBaseStatistics(self): """ Updates basic computed statistics for this chain, e.g. after any changes to the samples or weights :return: self after updating statistics. """ self.getVars() self.mean_mult = self.norm / self.numrows self.max_mult = np.max(self.weights) self._getParamIndices() self.needs_update = False return self def addDerived(self, paramVec, name, **kwargs): """ Adds a new parameter :param paramVec: The vector of parameter values to add. :param name: The name for the new parameter :param kwargs: arguments for paramnames' :func:`~.paramnames.ParamList.addDerived` :return: The added parameter's :class:`~.paramnames.ParamInfo` object """ if self.paramNames.parWithName(name): raise ValueError('Parameter with name %s already exists' % name) self.changeSamples(np.c_[self.samples, paramVec]) return self.paramNames.addDerived(name, **kwargs) def loadChains(self, root, files, ignore_lines=None): """ Loads chains from files. :param root: Root name :param files: list of file names :param ignore_lines: Amount of lines at the start of the file to ignore, None if should not ignore :return: True if loaded successfully, False if none loaded """ self.chains = [] self.samples = None self.weights = None self.loglikes = None self.name_tag = self.name_tag or os.path.basename(root) for fname in files: if print_load_details: print(fname) self.chains.append(WeightedSamples(fname, ignore_lines or self.ignore_lines)) if len(self.chains) == 0: raise WeightedSampleError('loadChains - no chains found for ' + root) if self.paramNames is None: self.paramNames = ParamNames(default=self.chains[0].n) self._weightsChanged() return len(self.chains) > 0 def getGelmanRubinEigenvalues(self, nparam=None, chainlist=None): """ Assess convergence using var(mean)/mean(var) in the orthogonalized parameters c.f. Brooks and Gelman 1997. :param nparam: The number of parameters (starting at first), by default uses all of them :param chainlist: list of :class:`WeightedSamples`, the samples to use. Defaults to all the separate chains in this instance. :return: array of var(mean)/mean(var) for orthogonalized parameters """ if chainlist is None: chainlist = self.getSeparateChains() nparam = nparam or self.paramNames.numNonDerived() meanscov = np.zeros((nparam, nparam)) means = self.getMeans()[:nparam] meancov = np.zeros(meanscov.shape) for chain in chainlist: diff = chain.getMeans()[:nparam] - means meanscov += np.outer(diff, diff) meancov += chain.getCov(nparam) meanscov /= (len(chainlist) - 1) meancov /= len(chainlist) w, U = np.linalg.eigh(meancov) if np.min(w) > 0: U /= np.sqrt(w) D = np.linalg.eigvalsh(np.dot(U.T, meanscov).dot(U)) return D else: return None def getGelmanRubin(self, nparam=None, chainlist=None): """ Assess the convergence using the maximum var(mean)/mean(var) of orthogonalized parameters c.f. Brooks and Gelman 1997. :param nparam: The number of parameters, by default uses all :param chainlist: list of :class:`WeightedSamples`, the samples to use. Defaults to all the separate chains in this instance. :return: The worst var(mean)/mean(var) for orthogonalized parameters. Should be <<1 for good convergence. """ return np.max(self.getGelmanRubinEigenvalues(nparam, chainlist)) def makeSingle(self): """ Combines separate chains into one samples array, so self.samples has all the samples and this instance can then be used as a general :class:`~.chains.WeightedSamples` instance. :return: self """ self.chain_offsets = np.cumsum(np.array([0] + [chain.samples.shape[0] for chain in self.chains])) weights = np.hstack((chain.weights for chain in self.chains)) loglikes = np.hstack((chain.loglikes for chain in self.chains)) self.setSamples(np.vstack((chain.samples for chain in self.chains)), weights, loglikes) self.chains = None self.needs_update = True return self def getSeparateChains(self): """ Gets a list of samples for separate chains. If the chains have already been combined, uses the stored sample offsets to reconstruct the array (generally no array copying) :return: The list of :class:`~.chains.WeightedSamples` for each chain. """ if self.chains is not None: return self.chains chainlist = [] for off1, off2 in zip(self.chain_offsets[:-1], self.chain_offsets[1:]): chainlist.append(WeightedSamples(samples=self.samples[off1:off2], weights=self.weights[off1:off2], loglikes=self.loglikes[off1:off2])) return chainlist def removeBurnFraction(self, ignore_frac): """ Remove a fraction of the samples as burn in :param ignore_frac: fraction of sample points to remove from the start of the samples, or each chain if not combined """ if self.samples is not None: self.removeBurn(ignore_frac) self.chains = None self.needs_update = True else: for chain in self.chains: chain.removeBurn(ignore_frac) def deleteFixedParams(self): """ Delete parameters that are fixed (the same value in all samples) """ if self.samples is not None: fixed = WeightedSamples.deleteFixedParams(self) self.chains = None else: fixed = [] chain = self.chains[0] for i in range(chain.n): if np.all(chain.samples[:, i] == chain.samples[0, i]): fixed.append(i) for chain in self.chains: chain.changeSamples(np.delete(chain.samples, fixed, 1)) self.paramNames.deleteIndices(fixed) self._getParamIndices() def saveAsText(self, root, chain_index=None, make_dirs=False): """ Saves the samples as text files, including parameter names as .paramnames file. :param root: The root name to use :param chain_index: Optional index to be used for the filename, zero based, e.g. for saving one of multiple chains :param make_dirs: True if this should (recursively) create the directory if it doesn't exist """ super(Chains, self).saveAsText(root, chain_index, make_dirs) if not chain_index: self.paramNames.saveAsText(root + '.paramnames') def savePickle(self, filename): """ Save the current object to a file in pickle format :param filename: The file to write to """ with open(filename, 'wb') as output: pickle.dump(self, output, pickle.HIGHEST_PROTOCOL)
class Chains(WeightedSamples): """ Holds one or more sets of weighted samples, for example a set of MCMC chains. Inherits from :class:`~.chains.WeightedSamples`, also adding parameter names and labels :ivar paramNames: a :class:`~.paramnames.ParamNames` instance holding the parameter names and labels """ def __init__(self, root=None, jobItem=None, paramNamesFile=None, names=None, labels=None, renames=None, sampler=None, **kwargs): """ :param root: optional root name for files :param jobItem: optional jobItem for parameter grid item :param paramNamesFile: optional filename of a .paramnames files that holds parameter names :param names: optional list of names for the parameters :param labels: optional list of latex labels for the parameters :param renames: optional dictionary of parameter aliases :param sampler: string describing the type of samples (default :mcmc); if "nested" or "uncorrelated" the effective number of samples is calculated using uncorrelated approximation :param kwargs: extra options for :class:`~.chains.WeightedSamples`'s constructor """ self.chains = None WeightedSamples.__init__(self, **kwargs) self.jobItem = jobItem self.ignore_lines = float(kwargs.get('ignore_rows', 0)) self.root = root if not paramNamesFile and root: mid = ('' if root.endswith("/") else "__") if os.path.exists(root + '.paramnames'): paramNamesFile = root + '.paramnames' elif os.path.exists(root + mid + 'full.yaml'): paramNamesFile = root + mid + 'full.yaml' self.setParamNames(paramNamesFile or names) if labels is not None: self.paramNames.setLabels(labels) if renames is not None: self.updateRenames(renames) # Sampler that generated the chain -- assume "mcmc" if isinstance(sampler, six.string_types): if sampler.lower() not in ["mcmc", "nested", "uncorrelated"]: raise ValueError("Unknown sampler type %s" % sampler) self.sampler = sampler.lower() elif isinstance(paramNamesFile, six.string_types) and paramNamesFile.endswith("yaml"): from getdist.yaml_format_tools import get_sampler_type self.sampler = get_sampler_type(paramNamesFile) else: self.sampler = "mcmc" def setParamNames(self, names=None): """ Sets the names of the params. :param names: Either a :class:`~.paramnames.ParamNames` object, the name of a .paramnames file to load, a list of name strings, otherwise use default names (param1, param2...). """ self.paramNames = None if isinstance(names, ParamNames): self.paramNames = names elif isinstance(names, six.string_types): self.paramNames = ParamNames(names) elif names is not None: self.paramNames = ParamNames(names=names) elif self.samples is not None: self.paramNames = ParamNames(default=self.n) if self.paramNames: self._getParamIndices() self.needs_update = True def filter(self, where): """ Filter the stored samples to keep only samples matching filter :param where: list of sample indices to keep, or boolean array filter (e.g. x>5 to keep only samples where x>5) """ if self.chains is None: if hasattr(self, 'chain_offsets'): # must update chain_offsets to be able to correctly split back into separate filtered chains if needed lens = [0] for off1, off2 in zip(self.chain_offsets[:-1], self.chain_offsets[1:]): lens.append(np.count_nonzero(where[off1:off2])) self.chain_offsets = np.cumsum(np.array(lens)) super(Chains, self).filter(where) else: raise ValueError('chains are separated, makeSingle first or call filter on individual chains') def getParamNames(self): """ Get :class:`~.paramnames.ParamNames` object with names for the parameters :return: :class:`~.paramnames.ParamNames` object giving parameter names and labels """ return self.paramNames def _getParamIndices(self): """ Gets the indices of the params. :return: A dict mapping the param name to the parameter index. """ if self.samples is not None and len(self.paramNames.names) != self.n: raise WeightedSampleError("paramNames size does not match number of parameters in samples") index = dict() for i, name in enumerate(self.paramNames.names): index[name.name] = i self.index = index return self.index def getRenames(self): """ Updates the renames known to each parameter with the given dictionary of renames. """ return self.paramNames.getRenames() def updateRenames(self, renames): """ Updates the renames known to each parameter with the given dictionary of renames. """ self.paramNames.updateRenames(renames) def setParams(self, obj): """ Adds array variables obj.name1, obj.name2 etc, where obj.name1 is the vector of samples with name 'name1' if a parameter name is of the form aa.bb.cc, it makes subobjects so you can reference obj.aa.bb.cc :param obj: The object instance to add the parameter vectors variables :return: The obj after alterations. """ for i, name in enumerate(self.paramNames.names): path = name.name.split('.') ob = obj for p in path[:-1]: if not hasattr(ob, p): setattr(ob, p, ParSamples()) ob = getattr(ob, p) setattr(ob, path[-1], self.samples[:, i]) return obj def getParams(self): """ Creates a :class:`~.chains.ParSamples` object, with variables giving vectors for all the parameters, for example samples.getParams().name1 would be the vector of samples with name 'name1' :return: A :class:`~.chains.ParSamples` object containing all the parameter vectors, with attributes given by the parameter names """ pars = ParSamples() self.setParams(pars) return pars def getParamSampleDict(self, ix): """ Returns a dictionary of parameter values for sample number ix """ from collections import OrderedDict res = OrderedDict() for i, name in enumerate(self.paramNames.names): res[name.name] = self.samples[ix, i] res['weight'] = self.weights res['loglike'] = self.loglikes return res def _makeParamvec(self, par): if self.needs_update: self.updateBaseStatistics() if isinstance(par, ParamInfo): par = par.name if isinstance(par, six.string_types): return self.samples[:, self.index[par]] return WeightedSamples._makeParamvec(self, par) def updateChainBaseStatistics(self): # old name, use updateBaseStatistics return self.updateBaseStatistics() def updateBaseStatistics(self): """ Updates basic computed statistics for this chain, e.g. after any changes to the samples or weights :return: self after updating statistics. """ self.getVars() self.mean_mult = self.norm / self.numrows self.max_mult = np.max(self.weights) self._getParamIndices() self.needs_update = False return self def addDerived(self, paramVec, name, **kwargs): """ Adds a new parameter :param paramVec: The vector of parameter values to add. :param name: The name for the new parameter :param kwargs: arguments for paramnames' :func:`.paramnames.ParamList.addDerived` :return: The added parameter's :class:`~.paramnames.ParamInfo` object """ if self.paramNames.parWithName(name): raise ValueError('Parameter with name %s already exists' % name) self.changeSamples(np.c_[self.samples, paramVec]) return self.paramNames.addDerived(name, **kwargs) def loadChains(self, root, files_or_samples, weights=None, loglikes=None, ignore_lines=None): """ Loads chains from files. :param root: Root name :param files_or_samples: list of file names or list of arrays of samples, or single array of samples :param weights: if loading from arrays of samples, corresponding list of arrays of weights :param loglikes: if loading from arrays of samples, corresponding list of arrays of -2 log(likelihood) :param ignore_lines: Amount of lines at the start of the file to ignore, None if should not ignore :return: True if loaded successfully, False if none loaded """ self.chains = [] self.samples = None self.weights = None self.loglikes = None if ignore_lines is None: ignore_lines = self.ignore_lines WSkwargs = {"ignore_rows": ignore_lines, "min_weight_ratio": self.min_weight_ratio} if isinstance(files_or_samples, six.string_types) or isinstance(files_or_samples[0], six.string_types): # From files if weights is not None or loglikes is not None: raise ValueError('weights and loglikes not needed reading from file') if isinstance(files_or_samples, six.string_types): files_or_samples = [files_or_samples] self.name_tag = self.name_tag or os.path.basename(root) for fname in files_or_samples: if print_load_details: print(fname) try: self.chains.append(WeightedSamples(fname, **WSkwargs)) except WeightedSampleError: if print_load_details: print('Ignored file %s (likely empty)' % fname) nchains = len(self.chains) if not nchains: raise WeightedSampleError('loadChains - no chains found for ' + root) else: # From arrays def array_dimension(a): # Dimension for numpy or list/tuple arrays, not very safe (does not work if string elements) d = 0 while True: try: a = a[0] d += 1 except: return d dim = array_dimension(files_or_samples) if dim in [1, 2]: self.chains = None self.setSamples(slice_or_none(files_or_samples, ignore_lines), slice_or_none(weights, ignore_lines), slice_or_none(loglikes, ignore_lines), self.min_weight_ratio) if self.paramNames is None: self.paramNames = ParamNames(default=self.n) nchains = 1 elif dim == 3: for i, samples_i in enumerate(files_or_samples): self.chains.append(WeightedSamples( samples=samples_i, loglikes=None if loglikes is None else np.atleast_2d(loglikes)[i], weights=None if weights is None else np.atleast_2d(weights)[i], **WSkwargs)) if self.paramNames is None: self.paramNames = ParamNames(default=self.chains[0].n) nchains = len(self.chains) else: raise ValueError('samples or files must be array of samples, or a list of arrays or files') self._weightsChanged() return nchains > 0 def getGelmanRubinEigenvalues(self, nparam=None, chainlist=None): """ Assess convergence using var(mean)/mean(var) in the orthogonalized parameters c.f. Brooks and Gelman 1997. :param nparam: The number of parameters (starting at first), by default uses all of them :param chainlist: list of :class:`~.chains.WeightedSamples`, the samples to use. Defaults to all the separate chains in this instance. :return: array of var(mean)/mean(var) for orthogonalized parameters """ if chainlist is None: chainlist = self.getSeparateChains() nparam = nparam or self.paramNames.numNonDerived() meanscov = np.zeros((nparam, nparam)) means = self.getMeans()[:nparam] meancov = np.zeros(meanscov.shape) for chain in chainlist: diff = chain.getMeans()[:nparam] - means meanscov += np.outer(diff, diff) meancov += chain.getCov(nparam) meanscov /= (len(chainlist) - 1) meancov /= len(chainlist) w, U = np.linalg.eigh(meancov) if np.min(w) > 0: U /= np.sqrt(w) D = np.linalg.eigvalsh(np.dot(U.T, meanscov).dot(U)) return D else: return None def getGelmanRubin(self, nparam=None, chainlist=None): """ Assess the convergence using the maximum var(mean)/mean(var) of orthogonalized parameters c.f. Brooks and Gelman 1997. :param nparam: The number of parameters, by default uses all :param chainlist: list of :class:`~.chains.WeightedSamples`, the samples to use. Defaults to all the separate chains in this instance. :return: The worst var(mean)/mean(var) for orthogonalized parameters. Should be <<1 for good convergence. """ return np.max(self.getGelmanRubinEigenvalues(nparam, chainlist)) def makeSingle(self): """ Combines separate chains into one samples array, so self.samples has all the samples and this instance can then be used as a general :class:`~.chains.WeightedSamples` instance. :return: self """ self.chain_offsets = np.cumsum(np.array([0] + [chain.samples.shape[0] for chain in self.chains])) weights = None if self.chains[0].weights is None else np.hstack((chain.weights for chain in self.chains)) loglikes = None if self.chains[0].loglikes is None else np.hstack((chain.loglikes for chain in self.chains)) self.setSamples(np.vstack((chain.samples for chain in self.chains)), weights, loglikes, min_weight_ratio=-1) self.chains = None self.needs_update = True return self def getSeparateChains(self): """ Gets a list of samples for separate chains. If the chains have already been combined, uses the stored sample offsets to reconstruct the array (generally no array copying) :return: The list of :class:`~.chains.WeightedSamples` for each chain. """ if self.chains is not None: return self.chains chainlist = [] for off1, off2 in zip(self.chain_offsets[:-1], self.chain_offsets[1:]): chainlist.append(WeightedSamples(samples=self.samples[off1:off2], weights=self.weights[off1:off2], loglikes=self.loglikes[off1:off2])) return chainlist def removeBurnFraction(self, ignore_frac): """ Remove a fraction of the samples as burn in :param ignore_frac: fraction of sample points to remove from the start of the samples, or each chain if not combined """ if self.samples is not None: self.removeBurn(ignore_frac) self.chains = None self.needs_update = True else: for chain in self.chains: chain.removeBurn(ignore_frac) def deleteFixedParams(self): """ Delete parameters that are fixed (the same value in all samples) """ if self.samples is not None: fixed = WeightedSamples.deleteFixedParams(self) self.chains = None else: fixed = [] chain = self.chains[0] for i in range(chain.n): if np.all(chain.samples[:, i] == chain.samples[0, i]): fixed.append(i) for chain in self.chains: chain.changeSamples(np.delete(chain.samples, fixed, 1)) self.paramNames.deleteIndices(fixed) self._getParamIndices() def saveAsText(self, root, chain_index=None, make_dirs=False): """ Saves the samples as text files, including parameter names as .paramnames file. :param root: The root name to use :param chain_index: Optional index to be used for the filename, zero based, e.g. for saving one of multiple chains :param make_dirs: True if this should (recursively) create the directory if it doesn't exist """ super(Chains, self).saveAsText(root, chain_index, make_dirs) if not chain_index: self.paramNames.saveAsText(root + '.paramnames') def savePickle(self, filename): """ Save the current object to a file in pickle format :param filename: The file to write to """ with open(filename, 'wb') as output: pickle.dump(self, output, pickle.HIGHEST_PROTOCOL)
def loadChains(self, root, files_or_samples, weights=None, loglikes=None, ignore_lines=None): """ Loads chains from files. :param root: Root name :param files_or_samples: list of file names or list of arrays of samples, or single array of samples :param weights: if loading from arrays of samples, corresponding list of arrays of weights :param loglikes: if loading from arrays of samples, corresponding list of arrays of -2 log(likelihood) :param ignore_lines: Amount of lines at the start of the file to ignore, None if should not ignore :return: True if loaded successfully, False if none loaded """ self.chains = [] self.samples = None self.weights = None self.loglikes = None if ignore_lines is None: ignore_lines = self.ignore_lines WSkwargs = {"ignore_rows": ignore_lines, "min_weight_ratio": self.min_weight_ratio} if isinstance(files_or_samples, six.string_types) or isinstance(files_or_samples[0], six.string_types): # From files if weights is not None or loglikes is not None: raise ValueError('weights and loglikes not needed reading from file') if isinstance(files_or_samples, six.string_types): files_or_samples = [files_or_samples] self.name_tag = self.name_tag or os.path.basename(root) for fname in files_or_samples: if print_load_details: print(fname) try: self.chains.append(WeightedSamples(fname, **WSkwargs)) except WeightedSampleError: if print_load_details: print('Ignored file %s (likely empty)' % fname) nchains = len(self.chains) if not nchains: raise WeightedSampleError('loadChains - no chains found for ' + root) else: # From arrays def array_dimension(a): # Dimension for numpy or list/tuple arrays, not very safe (does not work if string elements) d = 0 while True: try: a = a[0] d += 1 except: return d dim = array_dimension(files_or_samples) if dim in [1, 2]: self.chains = None self.setSamples(slice_or_none(files_or_samples, ignore_lines), slice_or_none(weights, ignore_lines), slice_or_none(loglikes, ignore_lines), self.min_weight_ratio) if self.paramNames is None: self.paramNames = ParamNames(default=self.n) nchains = 1 elif dim == 3: for i, samples_i in enumerate(files_or_samples): self.chains.append(WeightedSamples( samples=samples_i, loglikes=None if loglikes is None else np.atleast_2d(loglikes)[i], weights=None if weights is None else np.atleast_2d(weights)[i], **WSkwargs)) if self.paramNames is None: self.paramNames = ParamNames(default=self.chains[0].n) nchains = len(self.chains) else: raise ValueError('samples or files must be array of samples, or a list of arrays or files') self._weightsChanged() return nchains > 0