def test_logsumexpinf(): np.random.seed(0) a = np.random.rand(10) b = np.random.rand(10) assert logsumexp(-np.inf, b=[-np.inf]) == -np.inf assert logsumexp(a, b=b) == sp.logsumexp(a, b=b) a[0] = -np.inf assert logsumexp(a, b=b) == sp.logsumexp(a, b=b) b[0] = -np.inf with warnings.catch_warnings(): warnings.filterwarnings('ignore', 'invalid value encountered in multiply', RuntimeWarning) assert np.isnan(sp.logsumexp(a, b=b)) assert np.isfinite(logsumexp(a, b=b))
def dlogX(self, nsamples=None): """Compute volume of shell of loglikelihood. Parameters ---------- nsamples: int, optional Number of samples to generate. optional. If None, then compute the statistical average. If integer, generate samples from the distribution. (Default: None) """ if np.ndim(nsamples) > 0: return nsamples elif nsamples is None: t = np.log(self.nlive / (self.nlive + 1)).to_frame() else: r = np.log(np.random.rand(len(self), nsamples)) t = pandas.DataFrame(r, self.index).divide(self.nlive, axis=0) logX = t.cumsum() logXp = logX.shift(1, fill_value=0) logXm = logX.shift(-1, fill_value=-np.inf) dlogX = logsumexp( [logXp.to_numpy(), logXm.to_numpy()], b=[np.ones_like(logXp), -np.ones_like(logXm)], axis=0) - np.log(2) if nsamples is None: dlogX = np.squeeze(dlogX) return WeightedSeries(dlogX, self.index, weights=self.weights) else: return WeightedDataFrame(dlogX, self.index, weights=self.weights)
def logZ(self, nsamples=None): """Log-Evidence. - If nsamples is not supplied, return mean log evidence - If nsamples is integer, return nsamples from the distribution - If nsamples is array, use nsamples as volumes of evidence shells """ dlogX = self.dlogX(nsamples) logw = dlogX.add(self.beta * self.logL, axis=0) return logsumexp(logw, axis=0)
def merge_samples_weighted(samples, weights=None): r"""Merge sets of samples with weights. Combine two (or more) samples so the new PDF is P(x|new) = weight_A P(x|A) + weight_B P(x|B). The number of samples and internal weights do not affect the result. Parameters ---------- samples: list(NestedSamples) or list(MCMCSamples) List or array-like of one or more MCMC or nested sampling runs. weights: list(double) or None Weight for each run in samples (normalized internally). Can be omitted if samples are NestedSamples, then exp(logZ) is used as weight. Returns ------- new_samples: MCMCSamples Merged (weighted) run. """ if not isinstance(samples, Sequence): raise TypeError("samples must be a Sequence (list of samples).") mcmc_samples = copy.deepcopy([MCMCSamples(s) for s in samples]) if weights is None: try: logZs = np.array(copy.deepcopy([s.logZ() for s in samples])) except AttributeError: raise ValueError("If samples includes MCMCSamples " "then weights must be given.") # Subtract logsumexp to avoid numerical issues (similar to max(logZs)) logZs -= logsumexp(logZs) weights = np.exp(logZs) else: if len(weights) != len(samples): raise ValueError( "samples and weights must have the same length," "each weight is for a whole sample. Currently", len(samples), len(weights)) new_samples = MCMCSamples() for s, w in zip(mcmc_samples, weights): # Normalize the given weights new_weights = s.weights / s.weights.sum() * w / np.sum(weights) s = MCMCSamples(s, weights=new_weights) new_samples = new_samples.append(s) new_samples.weights /= new_samples.weights.max() return new_samples
def D(self, nsamples=None): """Kullback-Leibler divergence. - If nsamples is not supplied, return mean KL divergence - If nsamples is integer, return nsamples from the distribution - If nsamples is array, use nsamples as volumes of evidence shells """ dlogX = self.dlogX(nsamples) logZ = self.logZ(dlogX) logw = dlogX.add(self.beta * self.logL, axis=0) - logZ S = (dlogX * 0).add(self.beta * self.logL, axis=0) - logZ return np.exp(logsumexp(logw, b=S, axis=0))
def ns_output(self, nsamples=200): """Compute Bayesian global quantities. Using nested sampling we can compute the evidence (logZ), Kullback-Leibler divergence (D) and Bayesian model dimensionality (d). More precisely, we can infer these quantities via their probability distribution. Parameters ---------- nsamples: int, optional number of samples to generate (Default: 100) Returns ------- pandas.DataFrame Samples from the P(logZ, D, d) distribution """ dlogX = self.dlogX(nsamples) samples = MCMCSamples(index=dlogX.columns) samples['logZ'] = self.logZ(dlogX) logw = dlogX.add(self.beta * self.logL, axis=0) logw -= samples.logZ S = (dlogX * 0).add(self.beta * self.logL, axis=0) - samples.logZ samples['D'] = np.exp(logsumexp(logw, b=S, axis=0)) samples['d'] = np.exp(logsumexp(logw, b=(S - samples.D)**2, axis=0)) * 2 samples.tex = { 'logZ': r'$\log\mathcal{Z}$', 'D': r'$\mathcal{D}$', 'd': r'$d$' } samples.label = self.label return samples
def d(self, nsamples=None): """Bayesian model dimensionality. - If nsamples is not supplied, return mean BMD - If nsamples is integer, return nsamples from the distribution - If nsamples is array, use nsamples as volumes of evidence shells """ dlogX = self.dlogX(nsamples) logZ = self.logZ(dlogX) D = self.D(dlogX) logw = dlogX.add(self.beta * self.logL, axis=0) - logZ S = (dlogX * 0).add(self.beta * self.logL, axis=0) - logZ return np.exp(logsumexp(logw, b=(S - D)**2, axis=0)) * 2
def dlogX(self, nsamples=None): """Compute volume of shell of loglikelihood. Parameters ---------- nsamples: int, optional Number of samples to generate. optional. If None, then compute the statistical average. If integer, generate samples from the distribution. (Default: None) """ logX = self.logX(nsamples) logXp = logX.shift(1, fill_value=0) logXm = logX.shift(-1, fill_value=-np.inf) dlogX = logsumexp([logXp.values, logXm.values], b=[np.ones_like(logXp), -np.ones_like(logXm)], axis=0) - np.log(2) if nsamples is None: dlogX = np.squeeze(dlogX) return WeightedSeries(dlogX, self.index, weights=self.weights) else: return WeightedDataFrame(dlogX, self.index, weights=self.weights)