Beispiel #1
0
def test_post_prior(tmpdir):
    # Generate original chain
    info: InputDict = {
        "output": os.path.join(tmpdir, "gaussian"), "force": True,
        "params": info_params, "sampler": info_sampler,
        "likelihood": {"one": None}, "prior": {"gaussian": sampled_pdf}}
    info_post: InputDict = {
        "output": info["output"], "force": True,
        "post": {"suffix": "foo", 'skip': 0.1,
                 "remove": {"prior": {"gaussian": None}},
                 "add": {"prior": {"target": target_pdf_prior}}}}
    _, sampler = run(info)
    if mpi.is_main_process():
        mcsamples_in = loadMCSamples(info["output"], settings={'ignore_rows': 0.1})
        target_mean, target_cov = mpi.share(_get_targets(mcsamples_in))
    else:
        target_mean, target_cov = mpi.share()

    for mem in [False, True]:
        post(info_post, sample=sampler.products()["sample"] if mem else None)
        # Load with GetDist and compare
        if mpi.is_main_process():
            mcsamples = loadMCSamples(
                info_post["output"] + _post_ + info_post["post"]["suffix"])
            new_mean = mcsamples.mean(["a", "b"])
            new_cov = mcsamples.getCovMat().matrix
            mpi.share((new_mean, new_cov))
        else:
            new_mean, new_cov = mpi.share()
        assert np.allclose(new_mean, target_mean)
        assert np.allclose(new_cov, target_cov)
Beispiel #2
0
 def check_convergence_and_learn_proposal(self):
     """
     Checks the convergence of the sampling process, and, if requested,
     learns a new covariance matrix for the proposal distribution from the covariance
     of the last samples.
     """
     # Compute Rminus1 of means
     self.been_waiting = 0
     if more_than_one_process():
         # Compute and gather means and covs
         use_first = int(self.n() / 2)
         mean = self.collection.mean(first=use_first)
         cov = self.collection.cov(first=use_first)
         acceptance_rate = self.get_acceptance_rate(use_first)
         Ns, means, covs, acceptance_rates = mpi.array_gather(
             [self.n(), mean, cov, acceptance_rate])
     else:
         # Compute and gather means, covs and CL intervals of last m-1 chain fractions
         m = 1 + self.Rminus1_single_split
         cut = int(len(self.collection) / m)
         try:
             acceptance_rate = self.get_acceptance_rate(cut)
             Ns = np.ones(m - 1) * cut
             means = np.array([
                 self.collection.mean(first=i * cut, last=(i + 1) * cut - 1)
                 for i in range(1, m)
             ])
             covs = np.array([
                 self.collection.cov(first=i * cut, last=(i + 1) * cut - 1)
                 for i in range(1, m)
             ])
         except always_stop_exceptions:
             raise
         except Exception:
             self.log.info(
                 "Not enough points in chain to check convergence. "
                 "Waiting for next checkpoint.")
             return
         acceptance_rates = None
     if is_main_process():
         self.progress.at[self.i_learn, "N"] = sum(Ns)
         self.progress.at[self.i_learn, "timestamp"] = \
             datetime.datetime.now().isoformat()
         acceptance_rate = (np.average(acceptance_rates, weights=Ns)
                            if acceptance_rates is not None else
                            acceptance_rate)
         self.log.info(
             " - Acceptance rate: %.3f" +
             (" = avg(%r)" % list(acceptance_rates)
              if acceptance_rates is not None else ""), acceptance_rate)
         self.progress.at[self.i_learn, "acceptance_rate"] = acceptance_rate
         # "Within" or "W" term -- our "units" for assessing convergence
         # and our prospective new covariance matrix
         mean_of_covs = np.average(covs, weights=Ns, axis=0)
         # "Between" or "B" term
         # We don't weight with the number of samples in the chains here:
         # shorter chains will likely be outliers, and we want to notice them
         cov_of_means = np.atleast_2d(np.cov(means.T))  # , fweights=Ns)
         # For numerical stability, we turn mean_of_covs into correlation matrix:
         #   rho = (diag(Sigma))^(-1/2) * Sigma * (diag(Sigma))^(-1/2)
         # and apply the same transformation to the mean of covs (same eigenvals!)
         d = np.sqrt(np.diag(cov_of_means))
         corr_of_means = (cov_of_means / d).T / d
         norm_mean_of_covs = (mean_of_covs / d).T / d
         success_means = False
         converged_means = False
         # Cholesky of (normalized) mean of covs and eigvals of Linv*cov_of_means*L
         try:
             L = np.linalg.cholesky(norm_mean_of_covs)
         except np.linalg.LinAlgError:
             self.log.warning(
                 "Negative covariance eigenvectors. "
                 "This may mean that the covariance of the samples does not "
                 "contain enough information at this point. "
                 "Skipping learning a new covmat for now.")
         else:
             Linv = np.linalg.inv(L)
             try:
                 eigvals = np.linalg.eigvalsh(
                     Linv.dot(corr_of_means).dot(Linv.T))
                 success_means = True
             except np.linalg.LinAlgError:
                 self.log.warning("Could not compute eigenvalues. "
                                  "Skipping learning a new covmat for now.")
             else:
                 Rminus1 = max(np.abs(eigvals))
                 self.progress.at[self.i_learn, "Rminus1"] = Rminus1
                 # For real square matrices, a possible def of the cond number is:
                 condition_number = Rminus1 / min(np.abs(eigvals))
                 self.log.debug(" - Condition number = %g",
                                condition_number)
                 self.log.debug(" - Eigenvalues = %r", eigvals)
                 self.log.info(
                     " - Convergence of means: R-1 = %f after %d accepted steps"
                     % (Rminus1, sum(Ns)) +
                     (" = sum(%r)" %
                      list(Ns) if more_than_one_process() else ""))
                 # Have we converged in means?
                 # (criterion must be fulfilled twice in a row)
                 converged_means = max(
                     Rminus1, self.Rminus1_last) < self.Rminus1_stop
     else:
         mean_of_covs = None
         success_means = None
         converged_means = False
         Rminus1 = None
     success_means, converged_means = mpi.share(
         (success_means, converged_means))
     # Check the convergence of the bounds of the confidence intervals
     # Same as R-1, but with the rms deviation from the mean bound
     # in units of the mean standard deviation of the chains
     if converged_means:
         if more_than_one_process():
             mcsamples = self.collection.sampled_to_getdist_mcsamples(
                 first=use_first)
             try:
                 bound = np.array([[
                     mcsamples.confidence(i,
                                          limfrac=self.Rminus1_cl_level /
                                          2.,
                                          upper=which)
                     for i in range(self.model.prior.d())
                 ] for which in [False, True]]).T
                 success_bounds = True
             except:
                 bound = None
                 success_bounds = False
             bounds = np.array(mpi.gather(bound))
         else:
             try:
                 mcsamples_list = [
                     self.collection.sampled_to_getdist_mcsamples(
                         first=i * cut, last=(i + 1) * cut - 1)
                     for i in range(1, m)
                 ]
             except always_stop_exceptions:
                 raise
             except:
                 self.log.info(
                     "Not enough points in chain to check c.l. convergence. "
                     "Waiting for next checkpoint.")
                 return
             try:
                 bounds = [
                     np.array([[
                         mcs.confidence(i,
                                        limfrac=self.Rminus1_cl_level / 2.,
                                        upper=which)
                         for i in range(self.model.prior.d())
                     ] for which in [False, True]]).T
                     for mcs in mcsamples_list
                 ]
                 success_bounds = True
             except:
                 bounds = None
                 success_bounds = False
         if is_main_process():
             if success_bounds:
                 Rminus1_cl = (np.std(bounds, axis=0).T /
                               np.sqrt(np.diag(mean_of_covs)))
                 self.log.debug(" - normalized std's of bounds = %r",
                                Rminus1_cl)
                 Rminus1_cl = np.max(Rminus1_cl)
                 self.progress.at[self.i_learn, "Rminus1_cl"] = Rminus1_cl
                 self.log.info(
                     " - Convergence of bounds: R-1 = %f after %d " %
                     (Rminus1_cl,
                      (sum(Ns) if more_than_one_process() else self.n())) +
                     "accepted steps" +
                     (" = sum(%r)" %
                      list(Ns) if more_than_one_process() else ""))
                 if Rminus1_cl < self.Rminus1_cl_stop:
                     self.converged = True
                     self.log.info("The run has converged!")
                     self._Ns = Ns
             else:
                 self.log.info(
                     "Computation of the bounds was not possible. "
                     "Waiting until the next converge check.")
     # Broadcast and save the convergence status and the last R-1 of means
     if success_means:
         self.Rminus1_last, self.converged = mpi.share((
             Rminus1, self.converged) if is_main_process() else None)
         # Do we want to learn a better proposal pdf?
         if self.learn_proposal and not self.converged:
             good_Rminus1 = (self.learn_proposal_Rminus1_max >
                             self.Rminus1_last >
                             self.learn_proposal_Rminus1_min)
             if not good_Rminus1:
                 self.mpi_info(
                     "Convergence less than requested for updates: "
                     "waiting until the next convergence check.")
                 return
             mean_of_covs = mpi.share(mean_of_covs)
             try:
                 self.proposer.set_covariance(mean_of_covs)
                 self.mpi_info(
                     " - Updated covariance matrix of proposal pdf.")
                 self.mpi_debug("%r", mean_of_covs)
             except:
                 self.mpi_debug(
                     "Updating covariance matrix failed unexpectedly. "
                     "waiting until next covmat learning attempt.")
     # Save checkpoint info
     self.write_checkpoint()
Beispiel #3
0
def test_post_likelihood():
    """
    Swaps likelihood "gaussian" for "target".

    It also tests aggregated chi2's by removing and adding a likelihood to an existing
    type.
    """
    # Generate original chain
    orig_interval = OutputOptions.output_inteveral_s
    try:
        OutputOptions.output_inteveral_s = 0
        info_params_local = deepcopy(info_params)
        info_params_local["dummy"] = 0
        dummy_loglike_add = 0.1
        dummy_loglike_remove = 0.01
        info = {
            "output": None,
            "force": True,
            "params": info_params_local,
            "sampler": info_sampler,
            "likelihood": {
                "gaussian": {
                    "external": sampled_pdf,
                    "type": "A"
                },
                "dummy": {
                    "external": lambda dummy: 1,
                    "type": "BB"
                },
                "dummy_remove": {
                    "external": lambda dummy: dummy_loglike_add,
                    "type": "BB"
                }
            }
        }
        info_out, sampler = run(info)
        samples_in = mpi.gather(sampler.products()["sample"])
        if mpi.is_main_process():
            mcsamples_in = MCSamplesFromCobaya(info_out, samples_in)
        else:
            mcsamples_in = None

        info_out.update({
            "post": {
                "suffix": "foo",
                "remove": {
                    "likelihood": {
                        "gaussian": None,
                        "dummy_remove": None
                    }
                },
                "add": {
                    "likelihood": {
                        "target": {
                            "external": target_pdf,
                            "type": "A",
                            "output_params": ["cprime"]
                        },
                        "dummy_add": {
                            "external": lambda dummy: dummy_loglike_remove,
                            "type": "BB"
                        }
                    }
                }
            }
        })
        info_post_out, products_post = post(info_out,
                                            sampler.products()["sample"])
        samples = mpi.gather(products_post["sample"])

        # Load with GetDist and compare
        if mcsamples_in:
            target_mean, target_cov = mpi.share(_get_targets(mcsamples_in))

            mcsamples = MCSamplesFromCobaya(info_post_out,
                                            samples,
                                            name_tag="sample")
            new_mean = mcsamples.mean(["a", "b"])
            new_cov = mcsamples.getCovMat().matrix
            mpi.share((new_mean, new_cov))
        else:
            target_mean, target_cov = mpi.share()
            new_mean, new_cov = mpi.share()
        assert np.allclose(new_mean, target_mean)
        assert np.allclose(new_cov, target_cov)
        assert allclose(products_post["sample"]["chi2__A"],
                        products_post["sample"]["chi2__target"])
        assert allclose(
            products_post["sample"]["chi2__BB"],
            products_post["sample"]["chi2__dummy"] +
            products_post["sample"]["chi2__dummy_add"])
    finally:
        OutputOptions.output_inteveral_s = orig_interval
Beispiel #4
0
def body_of_test(dim, tmpdir=None, random_state=None):
    mindim = 4
    assert dim > mindim, "Needs dimension>%d for the test." % mindim
    if mpi.is_main_process():
        random_state = np.random.default_rng(random_state)
        i_s = list(range(dim))
        random_state.shuffle(i_s)
        initial_random_covmat = random_cov(dim * [[0, 1]],
                                           random_state=random_state)
        mpi.share((i_s, initial_random_covmat))
    else:
        i_s, initial_random_covmat = mpi.share()

    n_altered = int(dim / 4)
    i_proposal = i_s[:n_altered]
    i_ref = i_s[n_altered:2 * n_altered]
    i_prior = i_s[2 * n_altered:3 * n_altered]
    removed = list(chain(*(i_proposal, i_ref, i_prior)))
    i_covmat = [i for i in range(dim) if i not in removed]
    for i in removed:
        diag = initial_random_covmat[i, i]
        initial_random_covmat[:, i] = 0
        initial_random_covmat[i, :] = 0
        initial_random_covmat[i, i] = diag
    # Prepare info, including refs, priors and reduced covmat
    prefix = "a_"
    if mpi.is_main_process():
        input_order = list(range(dim))
        random_state.shuffle(input_order)
    else:
        input_order = None
    input_order = mpi.share(input_order)
    info: InputDict = {"likelihood": {"one": None}, "params": {}}
    for i in input_order:
        p = prefix + str(i)
        info["params"][p] = {
            "prior": {
                "dist": "norm",
                "loc": 0,
                "scale": 1000
            }
        }
        sigma = np.sqrt(initial_random_covmat[i, i])
        if i in i_proposal:
            info["params"][p]["proposal"] = sigma
        elif i in i_ref:
            info["params"][prefix + str(i)]["ref"] = {
                "dist": "norm",
                "scale": sigma
            }
        elif i in i_prior:
            info["params"][prefix + str(i)]["prior"]["scale"] = sigma
    reduced_covmat = initial_random_covmat[np.ix_(i_covmat, i_covmat)]
    reduced_covmat_params = [prefix + str(i) for i in i_covmat]
    info["sampler"] = {"mcmc": {}}
    if tmpdir:
        filename = os.path.join(str(tmpdir), "mycovmat.dat")
        header = " ".join(reduced_covmat_params)
        np.savetxt(filename, reduced_covmat, header=header)
        info["sampler"]["mcmc"]["covmat"] = str(filename)
    else:
        info["sampler"]["mcmc"]["covmat_params"] = reduced_covmat_params
        info["sampler"]["mcmc"]["covmat"] = reduced_covmat
    to_compare = initial_random_covmat[np.ix_(input_order, input_order)]

    def callback(sampler):
        assert np.allclose(to_compare, sampler.proposer.get_covariance())

    info["sampler"]["mcmc"].update({
        "callback_function": callback,
        "callback_every": 1,
        "max_samples": 1,
        "burn_in": 0
    })
    run(info)
Beispiel #5
0
def body_of_test(info_logpdf, kind, tmpdir, derived=False, manual=False):
    rand = mpi.share(random())
    prefix = os.path.join(tmpdir, "%d" % round(1e8 * rand)) + os.sep
    if mpi.is_main_process():
        if os.path.exists(prefix):
            shutil.rmtree(prefix)
    # build updated info
    info = {
        "output": prefix,
        "params": {
            "x": {
                "prior": {
                    "min": 0,
                    "max": 1
                },
                "proposal": 0.05
            },
            "y": {
                "prior": {
                    "min": -1,
                    "max": 1
                },
                "proposal": 0.05
            }
        },
        "sampler": {
            "mcmc": {
                "max_samples": (10 if not manual else 5000),
                "learn_proposal": False
            }
        }
    }
    if derived:
        info["params"].update({
            "r": {
                "min": 0,
                "max": 1
            },
            "theta": {
                "min": -0.5,
                "max": 0.5
            }
        })
    # Complete according to kind
    if kind == "prior":
        info.update({"prior": info_logpdf, "likelihood": {"one": None}})
    elif kind == "likelihood":
        info.update({"likelihood": info_logpdf})
    else:
        raise ValueError("Kind of test not known.")
    # If there is an ext function that is not a string, don't write output!
    stringy = {k: v for k, v in info_logpdf.items() if isinstance(v, str)}
    if stringy != info_logpdf:
        info.pop("output")
    # Run
    updated_info, sampler = run(info)
    products = sampler.products()
    # Test values
    logprior_base = -np.log((info["params"]["x"]["prior"]["max"] -
                             info["params"]["x"]["prior"]["min"]) *
                            (info["params"]["y"]["prior"]["max"] -
                             info["params"]["y"]["prior"]["min"]))
    logps = {
        name: logpdf(
            **{
                arg: products["sample"][arg].values
                for arg in getfullargspec(logpdf)[0]
            })
        for name, logpdf in {
            "half_ring": half_ring_func,
            "gaussian_y": gaussian_func
        }.items()
    }
    # Test #1: values of logpdfs
    if kind == "prior":
        columns_priors = [
            c for c in products["sample"].data.columns
            if c.startswith("minuslogprior")
        ]
        assert np.allclose(
            products["sample"][columns_priors[0]].values,
            np.sum(products["sample"][columns_priors[1:]].values, axis=-1)), (
                "The single prior values do not add up to the total one.")
        assert np.allclose(
            logprior_base + sum(logps[p] for p in info_logpdf),
            -products["sample"]["minuslogprior"].values), (
                "The value of the total prior is not reproduced correctly.")
        assert np.isclose(
            sampler.model.logprior({
                'x': products["sample"]["x"][0],
                'y': products["sample"]["y"][0]
            }), -products["sample"]["minuslogprior"][0]
        ), ("The value of the total prior is not reproduced from mode.logprior."
            )
    elif kind == "likelihood":
        for lik in info["likelihood"]:
            assert np.allclose(
                -2 * logps[lik], products["sample"][get_chi2_name(lik)].values
            ), ("The value of the likelihood '%s' is not reproduced correctly."
                % lik)
    assert np.allclose(
        logprior_base + sum(logps[p] for p in info_logpdf),
        -products["sample"]["minuslogpost"].values), (
            "The value of the posterior is not reproduced correctly.")
    # Test derived parameters, if present -- for now just for "r"
    if derived:
        derived_values = {
            param:
            func(**{arg: products["sample"][arg].values
                    for arg in ["x", "y"]})
            for param, func in derived_funcs.items()
        }
        assert all(
            np.allclose(v, products["sample"][p].values)
            for p, v in derived_values.items()
        ), ("The value of the derived parameters is not reproduced correctly.")
    # Test updated info -- scripted
    if kind == "prior":
        assert info["prior"] == updated_info["prior"], (
            "The prior information has not been updated correctly.")
    elif kind == "likelihood":
        # Transform the likelihood info to the "external" convention and add defaults
        info_likelihood = deepcopy(info["likelihood"])
        for lik, value in list(info_likelihood.items()):
            if not hasattr(value, "get"):
                info_likelihood[lik] = {"external": value}
            info_likelihood[lik].update({
                k: v
                for k, v in Likelihood.get_defaults().items()
                if k not in info_likelihood[lik]
            })
            for k in ["input_params", "output_params"]:
                info_likelihood[lik].pop(k, None)
                updated_info["likelihood"][lik].pop(k)
        assert info_likelihood == updated_info["likelihood"], (
            "The likelihood information has not been updated correctly\n %r vs %r"
            % (info_likelihood, updated_info["likelihood"]))
    # Test updated info -- yaml
    # For now, only if ALL external pdfs are given as strings,
    # since the YAML load fails otherwise
    if stringy == info_logpdf:
        updated_output_file = os.path.join(prefix,
                                           FileSuffix.updated + ".yaml")
        with open(updated_output_file) as updated:
            updated_yaml = yaml_load("".join(updated.readlines()))
        for k, v in stringy.items():
            to_test = updated_yaml[kind][k]
            if kind == "likelihood":
                to_test = to_test["external"]
            assert to_test == info_logpdf[k], (
                "The updated external pdf info has not been written correctly."
            )