def test_post_prior(tmpdir): # Generate original chain info: InputDict = { "output": os.path.join(tmpdir, "gaussian"), "force": True, "params": info_params, "sampler": info_sampler, "likelihood": {"one": None}, "prior": {"gaussian": sampled_pdf}} info_post: InputDict = { "output": info["output"], "force": True, "post": {"suffix": "foo", 'skip': 0.1, "remove": {"prior": {"gaussian": None}}, "add": {"prior": {"target": target_pdf_prior}}}} _, sampler = run(info) if mpi.is_main_process(): mcsamples_in = loadMCSamples(info["output"], settings={'ignore_rows': 0.1}) target_mean, target_cov = mpi.share(_get_targets(mcsamples_in)) else: target_mean, target_cov = mpi.share() for mem in [False, True]: post(info_post, sample=sampler.products()["sample"] if mem else None) # Load with GetDist and compare if mpi.is_main_process(): mcsamples = loadMCSamples( info_post["output"] + _post_ + info_post["post"]["suffix"]) new_mean = mcsamples.mean(["a", "b"]) new_cov = mcsamples.getCovMat().matrix mpi.share((new_mean, new_cov)) else: new_mean, new_cov = mpi.share() assert np.allclose(new_mean, target_mean) assert np.allclose(new_cov, target_cov)
def check_convergence_and_learn_proposal(self): """ Checks the convergence of the sampling process, and, if requested, learns a new covariance matrix for the proposal distribution from the covariance of the last samples. """ # Compute Rminus1 of means self.been_waiting = 0 if more_than_one_process(): # Compute and gather means and covs use_first = int(self.n() / 2) mean = self.collection.mean(first=use_first) cov = self.collection.cov(first=use_first) acceptance_rate = self.get_acceptance_rate(use_first) Ns, means, covs, acceptance_rates = mpi.array_gather( [self.n(), mean, cov, acceptance_rate]) else: # Compute and gather means, covs and CL intervals of last m-1 chain fractions m = 1 + self.Rminus1_single_split cut = int(len(self.collection) / m) try: acceptance_rate = self.get_acceptance_rate(cut) Ns = np.ones(m - 1) * cut means = np.array([ self.collection.mean(first=i * cut, last=(i + 1) * cut - 1) for i in range(1, m) ]) covs = np.array([ self.collection.cov(first=i * cut, last=(i + 1) * cut - 1) for i in range(1, m) ]) except always_stop_exceptions: raise except Exception: self.log.info( "Not enough points in chain to check convergence. " "Waiting for next checkpoint.") return acceptance_rates = None if is_main_process(): self.progress.at[self.i_learn, "N"] = sum(Ns) self.progress.at[self.i_learn, "timestamp"] = \ datetime.datetime.now().isoformat() acceptance_rate = (np.average(acceptance_rates, weights=Ns) if acceptance_rates is not None else acceptance_rate) self.log.info( " - Acceptance rate: %.3f" + (" = avg(%r)" % list(acceptance_rates) if acceptance_rates is not None else ""), acceptance_rate) self.progress.at[self.i_learn, "acceptance_rate"] = acceptance_rate # "Within" or "W" term -- our "units" for assessing convergence # and our prospective new covariance matrix mean_of_covs = np.average(covs, weights=Ns, axis=0) # "Between" or "B" term # We don't weight with the number of samples in the chains here: # shorter chains will likely be outliers, and we want to notice them cov_of_means = np.atleast_2d(np.cov(means.T)) # , fweights=Ns) # For numerical stability, we turn mean_of_covs into correlation matrix: # rho = (diag(Sigma))^(-1/2) * Sigma * (diag(Sigma))^(-1/2) # and apply the same transformation to the mean of covs (same eigenvals!) d = np.sqrt(np.diag(cov_of_means)) corr_of_means = (cov_of_means / d).T / d norm_mean_of_covs = (mean_of_covs / d).T / d success_means = False converged_means = False # Cholesky of (normalized) mean of covs and eigvals of Linv*cov_of_means*L try: L = np.linalg.cholesky(norm_mean_of_covs) except np.linalg.LinAlgError: self.log.warning( "Negative covariance eigenvectors. " "This may mean that the covariance of the samples does not " "contain enough information at this point. " "Skipping learning a new covmat for now.") else: Linv = np.linalg.inv(L) try: eigvals = np.linalg.eigvalsh( Linv.dot(corr_of_means).dot(Linv.T)) success_means = True except np.linalg.LinAlgError: self.log.warning("Could not compute eigenvalues. " "Skipping learning a new covmat for now.") else: Rminus1 = max(np.abs(eigvals)) self.progress.at[self.i_learn, "Rminus1"] = Rminus1 # For real square matrices, a possible def of the cond number is: condition_number = Rminus1 / min(np.abs(eigvals)) self.log.debug(" - Condition number = %g", condition_number) self.log.debug(" - Eigenvalues = %r", eigvals) self.log.info( " - Convergence of means: R-1 = %f after %d accepted steps" % (Rminus1, sum(Ns)) + (" = sum(%r)" % list(Ns) if more_than_one_process() else "")) # Have we converged in means? # (criterion must be fulfilled twice in a row) converged_means = max( Rminus1, self.Rminus1_last) < self.Rminus1_stop else: mean_of_covs = None success_means = None converged_means = False Rminus1 = None success_means, converged_means = mpi.share( (success_means, converged_means)) # Check the convergence of the bounds of the confidence intervals # Same as R-1, but with the rms deviation from the mean bound # in units of the mean standard deviation of the chains if converged_means: if more_than_one_process(): mcsamples = self.collection.sampled_to_getdist_mcsamples( first=use_first) try: bound = np.array([[ mcsamples.confidence(i, limfrac=self.Rminus1_cl_level / 2., upper=which) for i in range(self.model.prior.d()) ] for which in [False, True]]).T success_bounds = True except: bound = None success_bounds = False bounds = np.array(mpi.gather(bound)) else: try: mcsamples_list = [ self.collection.sampled_to_getdist_mcsamples( first=i * cut, last=(i + 1) * cut - 1) for i in range(1, m) ] except always_stop_exceptions: raise except: self.log.info( "Not enough points in chain to check c.l. convergence. " "Waiting for next checkpoint.") return try: bounds = [ np.array([[ mcs.confidence(i, limfrac=self.Rminus1_cl_level / 2., upper=which) for i in range(self.model.prior.d()) ] for which in [False, True]]).T for mcs in mcsamples_list ] success_bounds = True except: bounds = None success_bounds = False if is_main_process(): if success_bounds: Rminus1_cl = (np.std(bounds, axis=0).T / np.sqrt(np.diag(mean_of_covs))) self.log.debug(" - normalized std's of bounds = %r", Rminus1_cl) Rminus1_cl = np.max(Rminus1_cl) self.progress.at[self.i_learn, "Rminus1_cl"] = Rminus1_cl self.log.info( " - Convergence of bounds: R-1 = %f after %d " % (Rminus1_cl, (sum(Ns) if more_than_one_process() else self.n())) + "accepted steps" + (" = sum(%r)" % list(Ns) if more_than_one_process() else "")) if Rminus1_cl < self.Rminus1_cl_stop: self.converged = True self.log.info("The run has converged!") self._Ns = Ns else: self.log.info( "Computation of the bounds was not possible. " "Waiting until the next converge check.") # Broadcast and save the convergence status and the last R-1 of means if success_means: self.Rminus1_last, self.converged = mpi.share(( Rminus1, self.converged) if is_main_process() else None) # Do we want to learn a better proposal pdf? if self.learn_proposal and not self.converged: good_Rminus1 = (self.learn_proposal_Rminus1_max > self.Rminus1_last > self.learn_proposal_Rminus1_min) if not good_Rminus1: self.mpi_info( "Convergence less than requested for updates: " "waiting until the next convergence check.") return mean_of_covs = mpi.share(mean_of_covs) try: self.proposer.set_covariance(mean_of_covs) self.mpi_info( " - Updated covariance matrix of proposal pdf.") self.mpi_debug("%r", mean_of_covs) except: self.mpi_debug( "Updating covariance matrix failed unexpectedly. " "waiting until next covmat learning attempt.") # Save checkpoint info self.write_checkpoint()
def test_post_likelihood(): """ Swaps likelihood "gaussian" for "target". It also tests aggregated chi2's by removing and adding a likelihood to an existing type. """ # Generate original chain orig_interval = OutputOptions.output_inteveral_s try: OutputOptions.output_inteveral_s = 0 info_params_local = deepcopy(info_params) info_params_local["dummy"] = 0 dummy_loglike_add = 0.1 dummy_loglike_remove = 0.01 info = { "output": None, "force": True, "params": info_params_local, "sampler": info_sampler, "likelihood": { "gaussian": { "external": sampled_pdf, "type": "A" }, "dummy": { "external": lambda dummy: 1, "type": "BB" }, "dummy_remove": { "external": lambda dummy: dummy_loglike_add, "type": "BB" } } } info_out, sampler = run(info) samples_in = mpi.gather(sampler.products()["sample"]) if mpi.is_main_process(): mcsamples_in = MCSamplesFromCobaya(info_out, samples_in) else: mcsamples_in = None info_out.update({ "post": { "suffix": "foo", "remove": { "likelihood": { "gaussian": None, "dummy_remove": None } }, "add": { "likelihood": { "target": { "external": target_pdf, "type": "A", "output_params": ["cprime"] }, "dummy_add": { "external": lambda dummy: dummy_loglike_remove, "type": "BB" } } } } }) info_post_out, products_post = post(info_out, sampler.products()["sample"]) samples = mpi.gather(products_post["sample"]) # Load with GetDist and compare if mcsamples_in: target_mean, target_cov = mpi.share(_get_targets(mcsamples_in)) mcsamples = MCSamplesFromCobaya(info_post_out, samples, name_tag="sample") new_mean = mcsamples.mean(["a", "b"]) new_cov = mcsamples.getCovMat().matrix mpi.share((new_mean, new_cov)) else: target_mean, target_cov = mpi.share() new_mean, new_cov = mpi.share() assert np.allclose(new_mean, target_mean) assert np.allclose(new_cov, target_cov) assert allclose(products_post["sample"]["chi2__A"], products_post["sample"]["chi2__target"]) assert allclose( products_post["sample"]["chi2__BB"], products_post["sample"]["chi2__dummy"] + products_post["sample"]["chi2__dummy_add"]) finally: OutputOptions.output_inteveral_s = orig_interval
def body_of_test(dim, tmpdir=None, random_state=None): mindim = 4 assert dim > mindim, "Needs dimension>%d for the test." % mindim if mpi.is_main_process(): random_state = np.random.default_rng(random_state) i_s = list(range(dim)) random_state.shuffle(i_s) initial_random_covmat = random_cov(dim * [[0, 1]], random_state=random_state) mpi.share((i_s, initial_random_covmat)) else: i_s, initial_random_covmat = mpi.share() n_altered = int(dim / 4) i_proposal = i_s[:n_altered] i_ref = i_s[n_altered:2 * n_altered] i_prior = i_s[2 * n_altered:3 * n_altered] removed = list(chain(*(i_proposal, i_ref, i_prior))) i_covmat = [i for i in range(dim) if i not in removed] for i in removed: diag = initial_random_covmat[i, i] initial_random_covmat[:, i] = 0 initial_random_covmat[i, :] = 0 initial_random_covmat[i, i] = diag # Prepare info, including refs, priors and reduced covmat prefix = "a_" if mpi.is_main_process(): input_order = list(range(dim)) random_state.shuffle(input_order) else: input_order = None input_order = mpi.share(input_order) info: InputDict = {"likelihood": {"one": None}, "params": {}} for i in input_order: p = prefix + str(i) info["params"][p] = { "prior": { "dist": "norm", "loc": 0, "scale": 1000 } } sigma = np.sqrt(initial_random_covmat[i, i]) if i in i_proposal: info["params"][p]["proposal"] = sigma elif i in i_ref: info["params"][prefix + str(i)]["ref"] = { "dist": "norm", "scale": sigma } elif i in i_prior: info["params"][prefix + str(i)]["prior"]["scale"] = sigma reduced_covmat = initial_random_covmat[np.ix_(i_covmat, i_covmat)] reduced_covmat_params = [prefix + str(i) for i in i_covmat] info["sampler"] = {"mcmc": {}} if tmpdir: filename = os.path.join(str(tmpdir), "mycovmat.dat") header = " ".join(reduced_covmat_params) np.savetxt(filename, reduced_covmat, header=header) info["sampler"]["mcmc"]["covmat"] = str(filename) else: info["sampler"]["mcmc"]["covmat_params"] = reduced_covmat_params info["sampler"]["mcmc"]["covmat"] = reduced_covmat to_compare = initial_random_covmat[np.ix_(input_order, input_order)] def callback(sampler): assert np.allclose(to_compare, sampler.proposer.get_covariance()) info["sampler"]["mcmc"].update({ "callback_function": callback, "callback_every": 1, "max_samples": 1, "burn_in": 0 }) run(info)
def body_of_test(info_logpdf, kind, tmpdir, derived=False, manual=False): rand = mpi.share(random()) prefix = os.path.join(tmpdir, "%d" % round(1e8 * rand)) + os.sep if mpi.is_main_process(): if os.path.exists(prefix): shutil.rmtree(prefix) # build updated info info = { "output": prefix, "params": { "x": { "prior": { "min": 0, "max": 1 }, "proposal": 0.05 }, "y": { "prior": { "min": -1, "max": 1 }, "proposal": 0.05 } }, "sampler": { "mcmc": { "max_samples": (10 if not manual else 5000), "learn_proposal": False } } } if derived: info["params"].update({ "r": { "min": 0, "max": 1 }, "theta": { "min": -0.5, "max": 0.5 } }) # Complete according to kind if kind == "prior": info.update({"prior": info_logpdf, "likelihood": {"one": None}}) elif kind == "likelihood": info.update({"likelihood": info_logpdf}) else: raise ValueError("Kind of test not known.") # If there is an ext function that is not a string, don't write output! stringy = {k: v for k, v in info_logpdf.items() if isinstance(v, str)} if stringy != info_logpdf: info.pop("output") # Run updated_info, sampler = run(info) products = sampler.products() # Test values logprior_base = -np.log((info["params"]["x"]["prior"]["max"] - info["params"]["x"]["prior"]["min"]) * (info["params"]["y"]["prior"]["max"] - info["params"]["y"]["prior"]["min"])) logps = { name: logpdf( **{ arg: products["sample"][arg].values for arg in getfullargspec(logpdf)[0] }) for name, logpdf in { "half_ring": half_ring_func, "gaussian_y": gaussian_func }.items() } # Test #1: values of logpdfs if kind == "prior": columns_priors = [ c for c in products["sample"].data.columns if c.startswith("minuslogprior") ] assert np.allclose( products["sample"][columns_priors[0]].values, np.sum(products["sample"][columns_priors[1:]].values, axis=-1)), ( "The single prior values do not add up to the total one.") assert np.allclose( logprior_base + sum(logps[p] for p in info_logpdf), -products["sample"]["minuslogprior"].values), ( "The value of the total prior is not reproduced correctly.") assert np.isclose( sampler.model.logprior({ 'x': products["sample"]["x"][0], 'y': products["sample"]["y"][0] }), -products["sample"]["minuslogprior"][0] ), ("The value of the total prior is not reproduced from mode.logprior." ) elif kind == "likelihood": for lik in info["likelihood"]: assert np.allclose( -2 * logps[lik], products["sample"][get_chi2_name(lik)].values ), ("The value of the likelihood '%s' is not reproduced correctly." % lik) assert np.allclose( logprior_base + sum(logps[p] for p in info_logpdf), -products["sample"]["minuslogpost"].values), ( "The value of the posterior is not reproduced correctly.") # Test derived parameters, if present -- for now just for "r" if derived: derived_values = { param: func(**{arg: products["sample"][arg].values for arg in ["x", "y"]}) for param, func in derived_funcs.items() } assert all( np.allclose(v, products["sample"][p].values) for p, v in derived_values.items() ), ("The value of the derived parameters is not reproduced correctly.") # Test updated info -- scripted if kind == "prior": assert info["prior"] == updated_info["prior"], ( "The prior information has not been updated correctly.") elif kind == "likelihood": # Transform the likelihood info to the "external" convention and add defaults info_likelihood = deepcopy(info["likelihood"]) for lik, value in list(info_likelihood.items()): if not hasattr(value, "get"): info_likelihood[lik] = {"external": value} info_likelihood[lik].update({ k: v for k, v in Likelihood.get_defaults().items() if k not in info_likelihood[lik] }) for k in ["input_params", "output_params"]: info_likelihood[lik].pop(k, None) updated_info["likelihood"][lik].pop(k) assert info_likelihood == updated_info["likelihood"], ( "The likelihood information has not been updated correctly\n %r vs %r" % (info_likelihood, updated_info["likelihood"])) # Test updated info -- yaml # For now, only if ALL external pdfs are given as strings, # since the YAML load fails otherwise if stringy == info_logpdf: updated_output_file = os.path.join(prefix, FileSuffix.updated + ".yaml") with open(updated_output_file) as updated: updated_yaml = yaml_load("".join(updated.readlines())) for k, v in stringy.items(): to_test = updated_yaml[kind][k] if kind == "likelihood": to_test = to_test["external"] assert to_test == info_logpdf[k], ( "The updated external pdf info has not been written correctly." )