def test_parametrization(): updated_info, products = run(info) sample = products["sample"] for i, point in sample: a = info[_params]["a"] b = get_external_function(info[_params]["b"])(a, point["bprime"]) d = get_external_function(d_func)(a) e = get_external_function(e_func)(b) f = get_external_function(info[_params]["f"]["derived"])(x_func( point["c"])) assert np.allclose(point[[_derived_pre + p for p in ["d", "e", "f"]]], [d, e, f])
def test_parameterization(): updated_info, sampler = run(info) products = sampler.products() sample = products["sample"] from getdist.mcsamples import MCSamplesFromCobaya gdsample = MCSamplesFromCobaya(updated_info, products["sample"]) for i, point in sample: a = info[_params]["a"] b = get_external_function(info[_params]["b"])(a, point["bprime"]) c = get_external_function(info[_params]["c"])(a, point["cprime"]) e = get_external_function(e_func)(b) f = get_external_function(f_func)(b) g = get_external_function(info[_params]["g"]["derived"])(x_func( point["c"])) h = get_external_function(info[_params]["h"])(info[_params]["i"]) j = get_external_function(info[_params]["j"])(b) k = get_external_function(info[_params]["k"]["derived"])(f) assert np.allclose(point[["b", "c", "e", "f", "g", "h", "j", "k"]], [b, c, e, f, g, h, j, k]) # Test for GetDist too (except fixed ones, ignored by GetDist) bcefffg_getdist = [ gdsample.samples[i][gdsample.paramNames.list().index(p)] for p in ["b", "c", "e", "f", "g", "j", "k"] ] assert np.allclose(bcefffg_getdist, [b, c, e, f, g, j, k])
def __init__(self, name, info, parameterization, _theory=None, timing=None): self.name = name self.log = logging.getLogger(self.name) # Load info of the likelihood for k in info: setattr(self, k, info[k]) # Store the external function and its arguments self.external_function = get_external_function(info[_external], name=self.name) argspec = getargspec(self.external_function) self.input_params = odict( [(p, None) for p in argspec.args if p not in ["_derived", "_theory"] and p in parameterization.input_params()]) self.has_derived = "_derived" in argspec.args if self.has_derived: derived_kw_index = argspec.args[-len(argspec.defaults):].index("_derived") self.output_params = argspec.defaults[derived_kw_index] else: self.output_params = [] self.has_theory = "_theory" in argspec.args if self.has_theory: theory_kw_index = argspec.args[-len(argspec.defaults):].index("_theory") self.needs = argspec.defaults[theory_kw_index] # Timing self.timing = timing self.n = 0 self.time_avg = 0 # States, to avoid recomputing self.n_states = 3 self.states = [{"params": None, "logp": None, "derived": None, "last": 0} for _ in range(self.n_states)]
def __init__(self, info, name, timing=None): Theory.__init__(self, info, name=name, timing=timing, standalone=False) # Store the external function and assign its arguments self.external_function = get_external_function(info[_external], name=name) self._self_arg = "_self" argspec = getfullargspec(self.external_function) if info.get(_input_params, []): setattr(self, _input_params, str_to_list(info.get(_input_params))) else: ignore_args = [self._self_arg] # MARKED FOR DEPRECATION IN v3.0 ignore_args += ["_derived", "_theory"] # END OF DEPRECATION BLOCK setattr(self, _input_params, [p for p in argspec.args if p not in ignore_args]) # MARKED FOR DEPRECATION IN v3.0 self._derived_through_arg = "_derived" in argspec.args # END OF DEPRECATION BLOCK if info.get(_output_params, []): setattr(self, _output_params, str_to_list(info.get(_output_params))) # MARKED FOR DEPRECATION IN v3.0 elif self._derived_through_arg: self.log.warning( "The use of a `_derived` argument to deal with derived parameters will be" " deprecated in a future version. From now on please list your derived " "parameters in a list as the value of %r in the likelihood info (see " "documentation) and have your function return a tuple " "`(logp, {derived_param_1: value_1, ...})`.", _output_params) # BEHAVIOUR TO BE REPLACED BY ERROR: derived_kw_index = argspec.args[-len(argspec.defaults):].index( "_derived") setattr(self, _output_params, argspec.defaults[derived_kw_index]) # END OF DEPRECATION BLOCK else: setattr(self, _output_params, []) # Required quantities from other components self._uses_self_arg = self._self_arg in argspec.args if info.get(_requires) and not self._uses_self_arg: raise LoggedError( self.log, "If a likelihood has external requirements, declared under %r, " "it needs to accept a keyword argument %r.", _requires, self._self_arg) # MARKED FOR DEPRECATION IN v3.0 self._uses_old_theory = "_theory" in argspec.args if self._uses_old_theory: self.log.warning( "The use of a `_theory` argument to deal with requirements will be" " deprecated in a future version. From now on please indicate your " "requirements as the value of field %r in the likelihood info (see " "documentation) and have your function take a parameter `_self`.", _requires) # BEHAVIOUR TO BE REPLACED BY ERROR: info[_requires] = argspec.defaults[ argspec.args[-len(argspec.defaults):].index("_theory")] # END OF DEPRECATION BLOCK self._requirements = info.get(_requires, {}) or {} self.log.info("Initialized external likelihood.")
def __init__(self, info, name, timing=None): Theory.__init__(self, info, name=name, timing=timing, standalone=False) # Store the external function and assign its arguments self.external_function = get_external_function(info["external"], name=name) self._self_arg = "_self" argspec = getfullargspec(self.external_function) self.input_params = str_to_list(self.input_params) ignore_args = [self._self_arg] if argspec.defaults: required_args = argspec.args[:-len(argspec.defaults)] else: required_args = argspec.args self.params = {p: None for p in required_args if p not in ignore_args} # MARKED FOR DEPRECATION IN v3.0 if "_derived" in argspec.args: raise LoggedError( self.log, "The use of a `_derived` argument to deal with derived " "parameters has been deprecated. From now on please list your " "derived parameters in a list as the value of %r in the " "likelihood info (see documentation) and have your function " "return a tuple `(logp, {derived_param_1: value_1, ...})`.", "output_params") # END OF DEPRECATION BLOCK if self.output_params: self.output_params = str_to_list(self.output_params) or [] # Required quantities from other components self._uses_self_arg = self._self_arg in argspec.args if info.get("requires") and not self._uses_self_arg: raise LoggedError( self.log, "If a likelihood has external requirements, declared under %r, " "it needs to accept a keyword argument %r.", "requires", self._self_arg) self._requirements = info.get("requires") or {} # MARKED FOR DEPRECATION IN v3.0 if "_theory" in argspec.args: raise LoggedError( self.log, "The use of a `_theory` argument to deal with requirements has " "been deprecated. From now on please indicate your requirements" " as the value of field %r in the likelihood info (see " "documentation) and have your function take a parameter " "`_self`.", "requires") # END OF DEPRECATION BLOCK self._optional_args = \ [p for p, val in chain(zip(argspec.args[-len(argspec.defaults):], argspec.defaults) if argspec.defaults else [], (argspec.kwonlydefaults or {}).items()) if p not in ignore_args and (isinstance(val, numbers.Number) or val is None)] self._args = set(chain(self._optional_args, self.params)) if argspec.varkw: self._args.update(self.input_params) self.log.info("Initialized external likelihood.")
def test_parameterization(): updated_info, products = run(info) sample = products["sample"] from getdist.mcsamples import loadCobayaSamples gdsample = loadCobayaSamples(updated_info, products["sample"]) for i, point in sample: a = info[_params]["a"] b = get_external_function(info[_params]["b"])(a, point["bprime"]) c = get_external_function(info[_params]["c"])(a, point["cprime"]) e = get_external_function(e_func)(b) f = get_external_function(f_func)(b) g = get_external_function(info[_params]["g"]["derived"])(x_func( point["c"])) assert np.allclose(point[["b", "c", "e", "f", "g"]], [b, c, e, f, g]) bcefg_getdist = [ gdsample.samples[i][gdsample.paramNames.list().index(p)] for p in ["b", "c", "e", "f", "g"] ] assert np.allclose(bcefg_getdist, [b, c, e, f, g])
def __init__(self, name, info, parametrization, theory=None): self.name = name # Load info of the likelihood for k in info: setattr(self, k, info[k]) # Store the external function and its arguments self.external_function = get_external_function(info[_external], name=self.name) argspec = inspect.getargspec(self.external_function) self.input_params = odict([(p, None) for p in argspec.args if p not in ["derived", "theory"] and p in parametrization.input_params()]) self.has_derived = "derived" in argspec.args if self.has_derived: derived_kw_index = argspec.args[-len(argspec.defaults):].index( "derived") self.output_params = argspec.defaults[derived_kw_index] else: self.output_params = [] self.has_theory = "theory" in argspec.args if self.has_theory: theory_kw_index = argspec.args[-len(argspec.defaults):].index( "theory") self.needs = argspec.defaults[theory_kw_index]
def initialize(self): """Initializes the sampler: creates the proposal distribution and draws the initial sample.""" self.log.debug("Initializing") for p in [ "burn_in", "max_tries", "output_every", "check_every", "callback_every" ]: setattr( self, p, read_dnumber(getattr(self, p), self.model.prior.d(), dtype=int)) if self.callback_every is None: self.callback_every = self.check_every # Burning-in countdown -- the +1 accounts for the initial point (always accepted) self.burn_in_left = self.burn_in + 1 # Max # checkpoints to wait, in case one process dies without sending MPI_ABORT self.been_waiting = 0 self.max_waiting = max(50, self.max_tries / self.model.prior.d()) if self.resuming and (max(self.mpi_size or 0, 1) != max( get_mpi_size(), 1)): self.log.error( "Cannot resume a sample with a different number of chains: " "was %d and now is %d.", max(self.mpi_size, 1), max(get_mpi_size(), 1)) raise HandledException if not self.resuming and self.output: # Delete previous files (if not "forced", the run would have already failed) if ((os.path.abspath(self.covmat_filename()) != os.path.abspath( str(self.covmat)))): try: os.remove(self.covmat_filename()) except OSError: pass # There may be more that chains than expected, # if #ranks was bigger in a previous run i = 0 while True: i += 1 collection_filename, _ = self.output.prepare_collection(str(i)) try: os.remove(collection_filename) except OSError: break # One collection per MPI process: `name` is the MPI rank + 1 name = str(1 + (lambda r: r if r is not None else 0)(get_mpi_rank())) self.collection = Collection(self.model, self.output, name=name, resuming=self.resuming) self.current_point = OnePoint(self.model, OutputDummy({}), name=name) # Use standard MH steps by default self.get_new_sample = self.get_new_sample_metropolis # Prepare oversampling / dragging if applicable self.effective_max_samples = self.max_samples if self.oversample and self.drag: self.log.error("Choose either oversampling or dragging, not both.") raise HandledException if self.oversample: factors, blocks = self.model.likelihood._speeds_of_params( int_speeds=True) self.oversampling_factors = factors self.log.info("Oversampling with factors:\n" + "\n".join([ " %d : %r" % (f, b) for f, b in zip(self.oversampling_factors, blocks) ])) self.i_last_slow_block = None # No way right now to separate slow and fast slow_params = list(self.model.parameterization.sampled_params()) elif self.drag: speeds, blocks = self.model.likelihood._speeds_of_params( fast_slow=True, int_speeds=True) # For now, no blocking inside either fast or slow: just 2 blocks self.i_last_slow_block = 0 if np.all(speeds == speeds[0]): self.log.error( "All speeds are equal or too similar: cannot drag! " "Make sure to define accurate likelihoods' speeds.") raise HandledException # Make the 1st factor 1: speeds = [1, speeds[1] / speeds[0]] # Target: dragging step taking as long as slow step self.drag_interp_steps = self.drag * speeds[1] # Per dragging step, the (fast) posterior is evaluated *twice*, self.drag_interp_steps /= 2 self.drag_interp_steps = int(np.round(self.drag_interp_steps)) fast_params = list(chain(*blocks[1 + self.i_last_slow_block:])) # Not too much or too little dragging drag_limits = [(int(l) * len(fast_params) if l is not None else l) for l in self.drag_limits] if drag_limits[ 0] is not None and self.drag_interp_steps < drag_limits[0]: self.log.warning( "Number of dragging steps clipped from below: was not " "enough to efficiently explore the fast directions -- " "avoid this limit by decreasing 'drag_limits[0]'.") self.drag_interp_steps = drag_limits[0] if drag_limits[ 1] is not None and self.drag_interp_steps > drag_limits[1]: self.log.warning( "Number of dragging steps clipped from above: " "excessive, probably inefficient, exploration of the " "fast directions -- " "avoid this limit by increasing 'drag_limits[1]'.") self.drag_interp_steps = drag_limits[1] # Re-scale steps between checkpoint and callback to the slow dimensions only slow_params = list(chain(*blocks[:1 + self.i_last_slow_block])) self.n_slow = len(slow_params) for p in ["check_every", "callback_every"]: setattr( self, p, int(getattr(self, p) * self.n_slow / self.model.prior.d())) self.log.info("Dragging with oversampling per step:\n" + "\n".join([ " %d : %r" % (f, b) for f, b in zip([1, self.drag_interp_steps], [blocks[0], fast_params]) ])) self.get_new_sample = self.get_new_sample_dragging else: _, blocks = self.model.likelihood._speeds_of_params() self.oversampling_factors = [1 for b in blocks] slow_params = list(self.model.parameterization.sampled_params()) self.n_slow = len(slow_params) # Turn parameter names into indices self.blocks = [[ list(self.model.parameterization.sampled_params()).index(p) for p in b ] for b in blocks] self.proposer = BlockedProposer( self.blocks, oversampling_factors=self.oversampling_factors, i_last_slow_block=self.i_last_slow_block, proposal_scale=self.proposal_scale) # Build the initial covariance matrix of the proposal, or load from checkpoint if self.resuming: covmat = np.loadtxt(self.covmat_filename()) self.log.info("Covariance matrix from checkpoint.") else: covmat = self.initial_proposal_covmat(slow_params=slow_params) self.log.info("Initial covariance matrix.") self.log.debug( "Sampling with covmat:\n%s", DataFrame( covmat, columns=self.model.parameterization.sampled_params(), index=self.model.parameterization.sampled_params()).to_string( line_width=_line_width)) self.proposer.set_covariance(covmat) # Prepare callback function if self.callback_function is not None: self.callback_function_callable = (get_external_function( self.callback_function))
def initialize(self): """Initializes the sampler: creates the proposal distribution and draws the initial sample.""" if not self.model.prior.d(): raise LoggedError(self.log, "No parameters being varied for sampler") self.log.debug("Initializing") # MARKED FOR DEPRECATION IN v3.0 if getattr(self, "oversample", None) is not None: self.log.warning( "*DEPRECATION*: `oversample` will be deprecated in the " "next version. Oversampling is now requested by setting " "`oversample_power` > 0.") # END OF DEPRECATION BLOCK # MARKED FOR DEPRECATION IN v3.0 if getattr(self, "check_every", None) is not None: self.log.warning( "*DEPRECATION*: `check_every` will be deprecated in the " "next version. Please use `learn_every` instead.") # BEHAVIOUR TO BE REPLACED BY ERROR: self.learn_every = getattr(self, "check_every") # END OF DEPRECATION BLOCK if self.callback_every is None: self.callback_every = self.learn_every self._quants_d_units = [] for q in ["max_tries", "learn_every", "callback_every", "burn_in"]: number = NumberWithUnits(getattr(self, q), "d", dtype=int) self._quants_d_units.append(number) setattr(self, q, number) self.output_every = NumberWithUnits(self.output_every, "s", dtype=int) if is_main_process(): if self.output.is_resuming() and (max(self.mpi_size or 0, 1) != max(get_mpi_size(), 1)): raise LoggedError( self.log, "Cannot resume a run with a different number of chains: " "was %d and now is %d.", max(self.mpi_size, 1), max(get_mpi_size(), 1)) if more_than_one_process(): if get_mpi().Get_version()[0] < 3: raise LoggedError( self.log, "MPI use requires MPI version 3.0 or " "higher to support IALLGATHER.") sync_processes() # One collection per MPI process: `name` is the MPI rank + 1 name = str(1 + (lambda r: r if r is not None else 0)(get_mpi_rank())) self.collection = Collection(self.model, self.output, name=name, resuming=self.output.is_resuming()) self.current_point = OneSamplePoint(self.model) # Use standard MH steps by default self.get_new_sample = self.get_new_sample_metropolis # Prepare callback function if self.callback_function is not None: self.callback_function_callable = (get_external_function( self.callback_function)) # Useful for getting last points added inside callback function self.last_point_callback = 0 # Monitoring/restore progress if is_main_process(): cols = [ "N", "timestamp", "acceptance_rate", "Rminus1", "Rminus1_cl" ] self.progress = DataFrame(columns=cols) self.i_learn = 1 if self.output and not self.output.is_resuming(): with open(self.progress_filename(), "w", encoding="utf-8") as progress_file: progress_file.write("# " + " ".join(self.progress.columns) + "\n") # Get first point, to be discarded -- not possible to determine its weight # Still, we need to compute derived parameters, since, as the proposal "blocked", # we may be saving the initial state of some block. # NB: if resuming but nothing was written (burn-in not finished): re-start if self.output.is_resuming() and len(self.collection): initial_point = (self.collection[ self.collection.sampled_params].iloc[len(self.collection) - 1]).values.copy() logpost = -(self.collection[_minuslogpost].iloc[ len(self.collection) - 1].copy()) logpriors = -(self.collection[self.collection.minuslogprior_names]. iloc[len(self.collection) - 1].copy()) loglikes = -0.5 * (self.collection[self.collection.chi2_names]. iloc[len(self.collection) - 1].copy()) derived = (self.collection[self.collection.derived_params].iloc[ len(self.collection) - 1].values.copy()) else: # NB: max_tries adjusted to dim instead of #cycles (blocking not computed yet) self.max_tries.set_scale(self.model.prior.d()) self.log.info( "Getting initial point... (this may take a few seconds)") initial_point, logpost, logpriors, loglikes, derived = \ self.model.get_valid_point(max_tries=self.max_tries.value) # If resuming but no existing chain, assume failed run and ignore blocking # if speeds measurement requested if self.output.is_resuming() and not len(self.collection) \ and self.measure_speeds: self.blocking = None if self.measure_speeds and self.blocking: self.log.warning( "Parameter blocking manually fixed: speeds will not be measured." ) elif self.measure_speeds: n = None if self.measure_speeds is True else int( self.measure_speeds) self.model.measure_and_set_speeds(n=n, discard=0) self.set_proposer_blocking() self.set_proposer_covmat(load=True) self.current_point.add(initial_point, derived=derived, logpost=logpost, logpriors=logpriors, loglikes=loglikes) self.log.info("Initial point: %s", self.current_point) # Max #(learn+convergence checks) to wait, # in case one process dies without sending MPI_ABORT self.been_waiting = 0 self.max_waiting = max(50, self.max_tries.unit_value) # Burning-in countdown -- the +1 accounts for the initial point (always accepted) self.burn_in_left = self.burn_in.value * self.current_point.output_thin + 1 # Initial dummy checkpoint # (needed when 1st "learn point" not reached in prev. run) self.write_checkpoint()
def initialize(self): """Initializes the sampler: creates the proposal distribution and draws the initial sample.""" if not self.model.prior.d(): raise LoggedError(self.log, "No parameters being varied for sampler") self.log.debug("Initializing") # MARKED FOR DEPRECATION IN v3.0 if getattr(self, "oversample", None) is not None: raise LoggedError( self.log, "`oversample` has been deprecated. " "Oversampling is now requested by setting " "`oversample_power` > 0.") # END OF DEPRECATION BLOCK # MARKED FOR DEPRECATION IN v3.0 if getattr(self, "check_every", None) is not None: raise LoggedError( self.log, "`check_every` has been deprecated. " "Please use `learn_every` instead.") # END OF DEPRECATION BLOCK if self.callback_every is None: self.callback_every = self.learn_every self._quants_d_units = [] for q in ["max_tries", "learn_every", "callback_every", "burn_in"]: number = NumberWithUnits(getattr(self, q), "d", dtype=int) self._quants_d_units.append(number) setattr(self, q, number) self.output_every = NumberWithUnits(self.output_every, "s", dtype=int) if is_main_process(): if self.output.is_resuming() and (max(self.mpi_size or 0, 1) != mpi.size()): raise LoggedError( self.log, "Cannot resume a run with a different number of chains: " "was %d and now is %d.", max(self.mpi_size or 0, 1), mpi.size()) sync_processes() # One collection per MPI process: `name` is the MPI rank + 1 name = str(1 + mpi.rank()) self.collection = SampleCollection(self.model, self.output, name=name, resuming=self.output.is_resuming()) self.current_point = OneSamplePoint(self.model) # Use standard MH steps by default self.get_new_sample = self.get_new_sample_metropolis # Prepare callback function if self.callback_function: self.callback_function_callable = (get_external_function( self.callback_function)) # Useful for getting last points added inside callback function self.last_point_callback = 0 self.i_learn = 1 # Monitoring/restore progress if is_main_process(): cols = [ "N", "timestamp", "acceptance_rate", "Rminus1", "Rminus1_cl" ] self.progress = DataFrame(columns=cols) if self.output and not self.output.is_resuming(): header_fmt = { "N": 6 * " " + "N", "timestamp": 17 * " " + "timestamp" } with open(self.progress_filename(), "w", encoding="utf-8") as progress_file: progress_file.write("# " + " ".join([ header_fmt.get(col, ((7 + 8) - len(col)) * " " + col) for col in self.progress.columns ]) + "\n") # Get first point, to be discarded -- not possible to determine its weight # Still, we need to compute derived parameters, since, as the proposal "blocked", # we may be saving the initial state of some block. # NB: if resuming but nothing was written (burn-in not finished): re-start if self.output.is_resuming() and len(self.collection): last = len(self.collection) - 1 initial_point = (self.collection[ self.collection.sampled_params].iloc[last]).to_numpy( dtype=np.float64, copy=True) results = LogPosterior( logpost=-self.collection[OutPar.minuslogpost].iloc[last], logpriors=-(self.collection[ self.collection.minuslogprior_names].iloc[last].to_numpy( dtype=np.float64, copy=True)), loglikes=-0.5 * (self.collection[self.collection.chi2_names].iloc[last]. to_numpy(dtype=np.float64, copy=True)), derived=(self.collection[ self.collection.derived_params].iloc[last].to_numpy( dtype=np.float64, copy=True))) else: # NB: max_tries adjusted to dim instead of #cycles (blocking not computed yet) self.max_tries.set_scale(self.model.prior.d()) self.log.info( "Getting initial point... (this may take a few seconds)") initial_point, results = \ self.model.get_valid_point(max_tries=self.max_tries.value, random_state=self._rng) # If resuming but no existing chain, assume failed run and ignore blocking # if speeds measurement requested if self.output.is_resuming() and not len(self.collection) \ and self.measure_speeds: self.blocking = None if self.measure_speeds and self.blocking: self.mpi_warning( "Parameter blocking manually fixed: speeds will not be measured." ) elif self.measure_speeds: n = None if self.measure_speeds is True else int( self.measure_speeds) self.model.measure_and_set_speeds(n=n, discard=0, random_state=self._rng) self.set_proposer_blocking() self.set_proposer_initial_covmat(load=True) self.current_point.add(initial_point, results) self.log.info("Initial point: %s", self.current_point) # Max #(learn+convergence checks) to wait, # in case one process dies/hangs without raising error self.been_waiting = 0 self.max_waiting = max(50, self.max_tries.unit_value) # Burning-in countdown -- the +1 accounts for the initial point (always accepted) self.burn_in_left = self.burn_in.value * self.current_point.output_thin + 1 self._msg_ready = ("Ready to check convergence" + (" and learn a new proposal covmat" if self.learn_proposal else "")) # Initial dummy checkpoint # (needed when 1st "learn point" not reached in prev. run) self.write_checkpoint()
def __init__(self, parameterization, info_prior=None): """ Initializes the prior and reference pdf's from the input information. """ constant_params_info = parameterization.constant_params() sampled_params_info = parameterization.sampled_params_info() if not sampled_params_info: log.warning("No sampled parameters requested! " "This will fail for non-mock samplers.") # pdf: a list of independent components # in principle, separable: one per parameter self.name = [] self.pdf = [] self.ref_pdf = [] self._bounds = np.zeros((len(sampled_params_info), 2)) for i, p in enumerate(sampled_params_info): self.name += [p] prior = sampled_params_info[p].get(_prior) self.pdf += [get_scipy_1d_pdf({p: prior})] fast_logpdf = fast_logpdfs.get(self.pdf[-1].dist.name) if fast_logpdf: self.pdf[-1].logpdf = MethodType(fast_logpdf, self.pdf[-1]) # Get the reference (1d) pdf ref = sampled_params_info[p].get(_p_ref) # Cases: number, pdf (something, but not a number), nothing if isinstance(ref, numbers.Number): self.ref_pdf += [float(ref)] elif ref is not None: self.ref_pdf += [get_scipy_1d_pdf({p: ref})] else: self.ref_pdf += [np.nan] self._bounds[i] = [-np.inf, np.inf] try: self._bounds[i] = self.pdf[-1].interval(1) except AttributeError: log.error( "No bounds defined for parameter '%s' " "(maybe not a scipy 1d pdf).", p) raise HandledException # Process the external prior(s): self.external = odict() for name in (info_prior if info_prior else {}): if name == _prior_1d_name: log.error( "The name '%s' is a reserved prior name. " "Please use a different one.", _prior_1d_name) raise HandledException log.debug("Loading external prior '%s' from: '%s'", name, info_prior[name]) self.external[name] = ({ "logp": get_external_function(info_prior[name], name=name) }) self.external[name]["argspec"] = (getargspec( self.external[name]["logp"])) self.external[name]["params"] = { p: list(sampled_params_info).index(p) for p in self.external[name]["argspec"].args if p in sampled_params_info } self.external[name]["constant_params"] = { p: constant_params_info[p] for p in self.external[name]["argspec"].args if p in constant_params_info } if (not (len(self.external[name]["params"]) + len(self.external[name]["constant_params"]))): log.error( "None of the arguments of the external prior '%s' " "are known *fixed* or *sampled* parameters. " "This prior recognizes: %r", name, self.external[name]["argspec"].args) raise HandledException params_without_default = self.external[name]["argspec"].args[:( len(self.external[name]["argspec"].args) - len(self.external[name]["argspec"].defaults or []))] if not all([(p in self.external[name]["params"] or p in self.external[name]["constant_params"]) for p in params_without_default]): log.error( "Some of the arguments of the external prior '%s' " "cannot be found and don't have a default value either: %s", name, list( set(params_without_default).difference( self.external[name]["params"]).difference( self.external[name]["constant_params"]))) raise HandledException log.warning( "External prior '%s' loaded. " "Mind that it might not be normalized!", name)
def __init__(self, parameterization: Parameterization, info_prior: Optional[PriorsDict] = None): """ Initializes the prior and reference pdf's from the input information. """ self.set_logger() self._parameterization = parameterization sampled_params_info = parameterization.sampled_params_info() # pdf: a list of independent components # in principle, separable: one per parameter self.params = [] self.pdf = [] self._bounds = np.zeros((len(sampled_params_info), 2)) for i, p in enumerate(sampled_params_info): self.params += [p] prior = sampled_params_info[p].get("prior") self.pdf += [get_scipy_1d_pdf({p: prior})] fast_logpdf = fast_logpdfs.get(self.pdf[-1].dist.name) if fast_logpdf: self.pdf[-1].logpdf = MethodType(fast_logpdf, self.pdf[-1]) self._bounds[i] = [-np.inf, np.inf] try: self._bounds[i] = self.pdf[-1].interval(1) except AttributeError: raise LoggedError( self.log, "No bounds defined for parameter '%s' " "(maybe not a scipy 1d pdf).", p) self._uniform_indices = np.array([ i for i, pdf in enumerate(self.pdf) if pdf.dist.name == 'uniform' ], dtype=int) self._non_uniform_indices = np.array([ i for i in range(len(self.pdf)) if i not in self._uniform_indices ], dtype=int) self._non_uniform_logpdf = [ self.pdf[i].logpdf for i in self._non_uniform_indices ] self._upper_limits = self._bounds[:, 1].copy() self._lower_limits = self._bounds[:, 0].copy() self._uniform_logp = -np.sum( np.log(self._upper_limits[self._uniform_indices] - self._lower_limits[self._uniform_indices])) # Set the reference pdf's self.set_reference( {p: v.get("ref") for p, v in sampled_params_info.items()}) # Process the external prior(s): self.external = {} self.external_dependence = set() info_prior = info_prior or {} for name in info_prior: if name == prior_1d_name: raise LoggedError( self.log, "The name '%s' is a reserved prior name. " "Please use a different one.", prior_1d_name) self.log.debug("Loading external prior '%s' from: '%s'", name, info_prior[name]) logp = get_external_function(info_prior[name], name=name) argspec = getfullargspec(logp) known = set(parameterization.input_params()) params = [p for p in argspec.args if p in known] params_without_default = set( argspec.args[:(len(argspec.args) - len(argspec.defaults or []))]) unknown = params_without_default - known if unknown: if unknown.intersection(parameterization.derived_params()): err = ( "External prior '%s' has arguments %s that are output derived " "parameters, Priors must be functions of input parameters. " "Use a separate 'likelihood' for the prior if needed.") else: err = ( "Some of the arguments of the external prior '%s' cannot be " "found and don't have a default value either: %s") raise LoggedError(self.log, err, name, list(unknown)) self.external_dependence.update(params) self.external[name] = ExternalPrior(logp=logp, params=params) self.mpi_warning( "External prior '%s' loaded. " "Mind that it might not be normalized!", name) parameterization.check_dropped(self.external_dependence)
def __init__(self, info_params: Union[ParamsDict, ExpandedParamsDict], allow_renames=True, ignore_unused_sampled=False): self.set_logger() self.allow_renames = allow_renames # First, we load the parameters, # not caring about whether they are understood by any likelihood. # `input` contains the parameters (expected to be) understood by the likelihood, # with its fixed value, its fixing function, or None if their value is given # directly by the sampler. self._infos = {} self._input: ParamValuesDict = {} self._input_funcs = {} self._input_args = {} self._input_dependencies: Dict[str, Set[str]] = {} self._dropped: Set[str] = set() self._output: ParamValuesDict = {} self._constant: ParamValuesDict = {} self._sampled: ParamValuesDict = {} self._sampled_renames: Dict[str, List[str]] = {} self._derived: ParamValuesDict = {} self._derived_inputs = [] self._derived_funcs = {} self._derived_args = {} self._derived_dependencies: Dict[str, Set[str]] = {} # Notice here that expand_info_param *always* adds a "derived":True tag # to infos without "prior" or "value", and a "value" field # to fixed params for p, info in info_params.items(): if isinstance(info, Mapping) and not set(info).issubset(partags): raise LoggedError(self.log, "Parameter '%s' has unknown options %s", p, set(info).difference(partags)) info = expand_info_param(info) self._infos[p] = info if is_fixed_or_function_param(info): if isinstance(info["value"], Real): self._constant[p] = float(info["value"]) self._input[p] = self._constant[p] if info.get("drop"): self._dropped.add(p) else: self._input[p] = np.nan self._input_funcs[p] = get_external_function(info["value"]) self._input_args[p] = getfullargspec( self._input_funcs[p]).args if is_sampled_param(info): self._sampled[p] = np.nan self._input[p] = np.nan if info.get("drop"): self._dropped.add(p) self._sampled_renames[p] = str_to_list( info.get("renames") or []) if is_derived_param(info): self._derived[p] = np.nan # Dynamical parameters whose value we want to save if info["derived"] is True and is_fixed_or_function_param( info): # parameters that are already known or computed by input funcs self._derived_inputs.append(p) elif info["derived"] is True: self._output[p] = np.nan else: self._derived_funcs[p] = get_external_function( info["derived"]) self._derived_args[p] = getfullargspec( self._derived_funcs[p]).args # Check that the sampled and derived params are all valid python variable names for p in chain(self._sampled, self._derived): if not is_valid_variable_name(p): is_in = p in self._sampled eg_in = " p_prime:\n prior: ...\n %s: " \ "'lambda p_prime: p_prime'\n" % p eg_out = " p_prime: 'lambda %s: %s'\n" % (p, p) raise LoggedError( self.log, "Parameter name '%s' is not a valid Python variable name " "(it needs to start with a letter or '_').\n" "If this is an %s parameter of a likelihood or theory, " "whose name you cannot change,%s define an associated " "%s one with a valid name 'p_prime' as: \n\n%s", p, "input" if is_in else "output", "" if is_in else " remove it and", "sampled" if is_in else "derived", eg_in if is_in else eg_out) # input params depend on input and sampled only, # never on output/derived unless constant known_input = set(self._input) all_input_arguments = set(chain(*self._input_args.values())) bad_input_dependencies = all_input_arguments - known_input if bad_input_dependencies: raise LoggedError( self.log, "Input parameters defined as functions can only depend on other " "input parameters. In particular, an input parameter cannot depend on %r." " Use an explicit Theory calculator for more complex dependencies.\n" "If you intended to define a derived output parameter use derived: " "instead of value:", list(bad_input_dependencies)) # Assume that the *un*known function arguments are likelihood/theory # output parameters for arg in (all_input_arguments.union(*self._derived_args.values()). difference(known_input).difference(self._derived)): self._output[arg] = np.nan # Useful set: directly "output-ed" derived self._directly_output = [p for p in self._derived if p in self._output] self._wrapped_input_funcs, self._wrapped_derived_funcs = \ self._get_wrapped_functions_evaluation_order() # Useful mapping: input params that vary if each sample is varied self._sampled_input_dependence = { s: [ i for i in self._input if s in self._input_dependencies.get(i, {}) ] for s in self._sampled } # From here on, some error control. # Only actually raise error after checking if used by prior. if not ignore_unused_sampled: self._dropped_not_directly_used = self._dropped.intersection( p for p, v in self._sampled_input_dependence.items() if not v) else: self._dropped_not_directly_used = set() # warn if repeated labels labels_inv_repeated = invert_dict(self.labels()) labels_inv_repeated = { k: v for k, v in labels_inv_repeated.items() if len(v) > 1 } if labels_inv_repeated: self.log.warning("There are repeated parameter labels: %r", labels_inv_repeated)
def __init__(self, info_params): # First, we load the parameters, # not caring about whether they are understood by any likelihood. # `input` contains the parameters (expected to be) understood by the likelihood, # with its fixed value, its fixing function, or None if their value is given # directly by the sampler. self._input = odict() self._input_funcs = dict() self._input_args = dict() self._output = odict() self._fixed = odict() self._sampled = odict() self._derived = odict() self._derived_funcs = dict() self._derived_args = dict() for p, info in info_params.items(): if is_fixed_param(info): self._input[p] = float(info) if isinstance(info, Number) else None if self._input[p] is None: self._input_funcs[p] = get_external_function(info) self._input_args[p] = getargspec(self._input_funcs[p]).args else: self._fixed[p] = self._input[p] if is_sampled_param(info): self._sampled[p] = info if not info.get(_p_drop, False): self._input[p] = None if is_derived_param(info): self._derived[p] = info if _p_derived in (info or {}): self._derived_funcs[p] = get_external_function(info[_p_derived]) self._derived_args[p] = getargspec(self._derived_funcs[p]).args else: self._output[p] = None # Check that the sampled and derived params are all valid python variable names def valid(name): try: parse("%s=None"%name) return True except SyntaxError: return False for p in chain(self.sampled_params(),self.derived_params()): if not valid(p): is_in = p in self.sampled_params() eg_in = " p_prime:\n prior: ...\n %s: 'lambda p_prime: p_prime'\n"%p eg_out = " p_prime: 'lambda %s: %s'\n"%(p,p) log.error("Parameter name '%s' is not a valid Python variable name " "(it needs to start with a letter or '_').\n" "If this is an %s parameter of a likelihood or theory, " "whose name you cannot change,%s define an associated " "%s one with a valid name 'p_prime' as: \n\n%s", p, "input" if is_in else "output", "" if is_in else " remove it and", "sampled" if is_in else "derived", eg_in if is_in else eg_out) raise HandledException # Assume that the *un*known function arguments are likelihood output parameters args = (set(chain(*self._input_args.values())) .union(chain(*self._derived_args.values()))) for p in list(self._input.keys()) + list(self._sampled.keys()) + list(self._output.keys()): if p in args: args.remove(p) self._output.update({p:None for p in args}) # Useful sets: directly-sampled input parameters and directly "output-ed" derived self._directly_sampled = [p for p in self._input if p in self._sampled] self._directly_output = [p for p in self._derived if p in self._output] # Useful mapping: input params that vary if each sampled is varied self._sampled_input_dependence = odict( [[s,[i for i in self._input if s in self._input_args.get(i, {})]] for s in self._sampled]) # From here on, some error control. dropped_but_never_used = ( set([p for p,v in self._sampled_input_dependence.items() if not v]) .difference(set(self._directly_sampled))) if dropped_but_never_used: log.error("Parameters %r are sampled but not passed to the likelihood or " "theory code, neither ever used as arguments for any parameters. " "Check that you are not using the '%s' tag unintentionally.", list(dropped_but_never_used), _p_drop) raise HandledException # input params depend on input and sampled only, never on output/derived bad_input_dependencies = set(chain(*self._input_args.values())).difference( set(self.input_params()).union(set(self.sampled_params()))) if bad_input_dependencies: log.error("Input parameters defined as functions can only depend on other " "input parameters that are not defined as functions. " "In particular, an input parameter cannot depend on %r", list(bad_input_dependencies)) raise HandledException # derived depend of input and output, never of sampled which are not input bad_derived_dependencies = set(chain(*self._derived_args.values())).difference( set(self.input_params()).union(set(self.output_params()))) if bad_derived_dependencies: log.error("Derived parameters can only depend on input and output parameters," " never on sampled parameters that have been defined as a function." " In particular, a derived parameter cannot depend on %r", list(bad_derived_dependencies)) raise HandledException
def __init__(self, parameterization: Parameterization, info_prior: Optional[PriorsDict] = None): """ Initializes the prior and reference pdf's from the input information. """ self.set_logger() self._parameterization = parameterization sampled_params_info = parameterization.sampled_params_info() # pdf: a list of independent components # in principle, separable: one per parameter self.params = [] self.pdf = [] self.ref_pdf = [] self._ref_is_pointlike = True self._bounds = np.zeros((len(sampled_params_info), 2)) for i, p in enumerate(sampled_params_info): self.params += [p] prior = sampled_params_info[p].get("prior") self.pdf += [get_scipy_1d_pdf({p: prior})] fast_logpdf = fast_logpdfs.get(self.pdf[-1].dist.name) if fast_logpdf: self.pdf[-1].logpdf = MethodType(fast_logpdf, self.pdf[-1]) # Get the reference (1d) pdf ref = sampled_params_info[p].get("ref") # Cases: number, pdf (something, but not a number), nothing if isinstance(ref, Sequence) and len(ref) == 2 and all( isinstance(n, numbers.Number) for n in ref): ref = {"dist": "norm", "loc": ref[0], "scale": ref[1]} if isinstance(ref, numbers.Real): self.ref_pdf += [float(ref)] elif isinstance(ref, Mapping): self.ref_pdf += [get_scipy_1d_pdf({p: ref})] self._ref_is_pointlike = False elif ref is None: self.ref_pdf += [np.nan] self._ref_is_pointlike = False else: raise LoggedError( self.log, "'ref' for starting position should be None or a number" ", a list of two numbers for normal mean and deviation," "or a dict with parameters for a scipy distribution.") self._bounds[i] = [-np.inf, np.inf] try: self._bounds[i] = self.pdf[-1].interval(1) except AttributeError: raise LoggedError( self.log, "No bounds defined for parameter '%s' " "(maybe not a scipy 1d pdf).", p) self._uniform_indices = np.array([ i for i, pdf in enumerate(self.pdf) if pdf.dist.name == 'uniform' ], dtype=int) self._non_uniform_indices = np.array([ i for i in range(len(self.pdf)) if i not in self._uniform_indices ], dtype=int) self._non_uniform_logpdf = [ self.pdf[i].logpdf for i in self._non_uniform_indices ] self._upper_limits = self._bounds[:, 1].copy() self._lower_limits = self._bounds[:, 0].copy() self._uniform_logp = -np.sum( np.log(self._upper_limits[self._uniform_indices] - self._lower_limits[self._uniform_indices])) # Process the external prior(s): self.external = {} self.external_dependence = set() info_prior = info_prior or {} for name in info_prior: if name == prior_1d_name: raise LoggedError( self.log, "The name '%s' is a reserved prior name. " "Please use a different one.", prior_1d_name) self.log.debug("Loading external prior '%s' from: '%s'", name, info_prior[name]) logp = get_external_function(info_prior[name], name=name) argspec = getfullargspec(logp) known = set(parameterization.input_params()) params = [p for p in argspec.args if p in known] params_without_default = set( argspec.args[:(len(argspec.args) - len(argspec.defaults or []))]) unknown = params_without_default - known if unknown: if unknown.intersection(parameterization.derived_params()): err = ( "External prior '%s' has arguments %s that are output derived " "parameters, Priors must be functions of input parameters. " "Use a separate 'likelihood' for the prior if needed.") else: err = ( "Some of the arguments of the external prior '%s' cannot be " "found and don't have a default value either: %s") raise LoggedError(self.log, err, name, list(unknown)) self.external_dependence.update(params) self.external[name] = ExternalPrior(logp=logp, params=params) self.mpi_warning( "External prior '%s' loaded. " "Mind that it might not be normalized!", name) parameterization.check_dropped(self.external_dependence)
def __init__(self, parameterization, info_prior=None): """ Initializes the prior and reference pdf's from the input information. """ self.set_logger() constant_params_info = parameterization.constant_params() sampled_params_info = parameterization.sampled_params_info() if not sampled_params_info: self.mpi_warning("No sampled parameters requested! " "This will fail for non-mock samplers.") # pdf: a list of independent components # in principle, separable: one per parameter self.params = [] self.pdf = [] self.ref_pdf = [] self._ref_is_pointlike = True self._bounds = np.zeros((len(sampled_params_info), 2)) for i, p in enumerate(sampled_params_info): self.params += [p] prior = sampled_params_info[p].get(_prior) self.pdf += [get_scipy_1d_pdf({p: prior})] fast_logpdf = fast_logpdfs.get(self.pdf[-1].dist.name) if fast_logpdf: self.pdf[-1].logpdf = MethodType(fast_logpdf, self.pdf[-1]) # Get the reference (1d) pdf ref = sampled_params_info[p].get(partag.ref) # Cases: number, pdf (something, but not a number), nothing if isinstance(ref, numbers.Real): self.ref_pdf += [float(ref)] elif ref is not None: self.ref_pdf += [get_scipy_1d_pdf({p: ref})] self._ref_is_pointlike = False else: self.ref_pdf += [np.nan] self._ref_is_pointlike = False self._bounds[i] = [-np.inf, np.inf] try: self._bounds[i] = self.pdf[-1].interval(1) except AttributeError: raise LoggedError( self.log, "No bounds defined for parameter '%s' " "(maybe not a scipy 1d pdf).", p) self._uniform_indices = np.array([ i for i, pdf in enumerate(self.pdf) if pdf.dist.name == 'uniform' ], dtype=int) self._non_uniform_indices = np.array([ i for i in range(len(self.pdf)) if i not in self._uniform_indices ], dtype=int) self._non_uniform_logpdf = [ self.pdf[i].logpdf for i in self._non_uniform_indices ] self._upper_limits = self._bounds[:, 1].copy() self._lower_limits = self._bounds[:, 0].copy() self._uniform_logp = -np.sum( np.log(self._upper_limits[self._uniform_indices] - self._lower_limits[self._uniform_indices])) # Process the external prior(s): self.external = {} for name in (info_prior if info_prior else {}): if name == _prior_1d_name: raise LoggedError( self.log, "The name '%s' is a reserved prior name. " "Please use a different one.", _prior_1d_name) self.log.debug("Loading external prior '%s' from: '%s'", name, info_prior[name]) opts = {"logp": get_external_function(info_prior[name], name=name)} self.external[name] = opts opts["argspec"] = (getfullargspec(opts["logp"])) opts["params"] = { p: list(sampled_params_info).index(p) for p in opts["argspec"].args if p in sampled_params_info } opts["constant_params"] = { p: constant_params_info[p] for p in opts["argspec"].args if p in constant_params_info } if (not (len(opts["params"]) + len(opts["constant_params"]))): raise LoggedError( self.log, "None of the arguments of the external prior '%s' " "are known *fixed* or *sampled* parameters. " "This prior recognizes: %r", name, opts["argspec"].args) params_without_default = opts["argspec"].args[:( len(opts["argspec"].args) - len(opts["argspec"].defaults or []))] if not all((p in opts["params"] or p in opts["constant_params"]) for p in params_without_default): raise LoggedError( self.log, "Some of the arguments of the external prior '%s' cannot " "be found and don't have a default value either: %s", name, list( set(params_without_default).difference( opts["params"]).difference( opts["constant_params"]))) self.mpi_warning( "External prior '%s' loaded. " "Mind that it might not be normalized!", name)
def __init__(self, info_params, allow_renames=True, ignore_unused_sampled=False): self.set_logger(lowercase=True) self.allow_renames = allow_renames # First, we load the parameters, # not caring about whether they are understood by any likelihood. # `input` contains the parameters (expected to be) understood by the likelihood, # with its fixed value, its fixing function, or None if their value is given # directly by the sampler. self._infos = odict() self._input = odict() self._input_funcs = dict() self._input_args = dict() self._output = odict() self._constant = odict() self._sampled = odict() self._sampled_renames = odict() self._derived = odict() self._derived_funcs = dict() self._derived_args = dict() # Notice here that expand_info_param *always* adds a _p_derived:True tag # to infos without _prior or _p_value, and a _p_value field to fixed params for p, info in info_params.items(): self._infos[p] = deepcopy(info) if is_fixed_param(info): if isinstance(info[_p_value], Number): self._constant[p] = info[_p_value] if not info.get(_p_drop, False): self._input[p] = self._constant[p] else: self._input[p] = None self._input_funcs[p] = get_external_function( info[_p_value]) self._input_args[p] = getargspec(self._input_funcs[p]).args if is_sampled_param(info): self._sampled[p] = None if not info.get(_p_drop, False): self._input[p] = None self._sampled_renames[p] = (( lambda x: [x] if isinstance(x, string_types) else x)(info.get( _p_renames, []))) if is_derived_param(info): self._derived[p] = deepcopy(info) # Dynamical parameters whose value we want to save if info[_p_derived] is True and is_fixed_param(info): info[_p_derived] = "lambda %s: %s" % (p, p) if info[_p_derived] is True: self._output[p] = None else: self._derived_funcs[p] = get_external_function( info[_p_derived]) self._derived_args[p] = getargspec( self._derived_funcs[p]).args # Check that the sampled and derived params are all valid python variable names for p in chain(self.sampled_params(), self.derived_params()): if not is_valid_variable_name(p): is_in = p in self.sampled_params() eg_in = " p_prime:\n prior: ...\n %s: 'lambda p_prime: p_prime'\n" % p eg_out = " p_prime: 'lambda %s: %s'\n" % (p, p) raise LoggedError( self.log, "Parameter name '%s' is not a valid Python variable name " "(it needs to start with a letter or '_').\n" "If this is an %s parameter of a likelihood or theory, " "whose name you cannot change,%s define an associated " "%s one with a valid name 'p_prime' as: \n\n%s", p, "input" if is_in else "output", "" if is_in else " remove it and", "sampled" if is_in else "derived", eg_in if is_in else eg_out) # Assume that the *un*known function arguments are likelihood output parameters args = (set(chain(*self._input_args.values())).union( chain(*self._derived_args.values()))) for p in (list(self._constant) + list(self._input) + list(self._sampled) + list(self._derived)): if p in args: args.remove(p) self._output.update({p: None for p in args}) # Useful sets: directly-sampled input parameters and directly "output-ed" derived self._directly_sampled = [p for p in self._input if p in self._sampled] self._directly_output = [p for p in self._derived if p in self._output] # Useful mapping: input params that vary if each sampled is varied self._sampled_input_dependence = odict( [[s, [i for i in self._input if s in self._input_args.get(i, {})]] for s in self._sampled]) # From here on, some error control. dropped_but_never_used = (set([ p for p, v in self._sampled_input_dependence.items() if not v ]).difference(set(self._directly_sampled))) if dropped_but_never_used and not ignore_unused_sampled: raise LoggedError( self.log, "Parameters %r are sampled but not passed to the likelihood or theory " "code, neither ever used as arguments for any parameters. " "Check that you are not using the '%s' tag unintentionally.", list(dropped_but_never_used), _p_drop) # input params depend on input and sampled only, never on output/derived all_input_arguments = set(chain(*self._input_args.values())) bad_input_dependencies = all_input_arguments.difference( set(self.input_params()).union(set(self.sampled_params())).union( set(self.constant_params()))) if bad_input_dependencies: raise LoggedError( self.log, "Input parameters defined as functions can only depend on other " "input parameters that are not defined as functions. " "In particular, an input parameter cannot depend on %r", list(bad_input_dependencies))
def initialize(self): """Imports the PolyChord sampler and prepares its arguments.""" if am_single_or_primary_process( ): # rank = 0 (MPI master) or None (no MPI) self.log.info("Initializing") # If path not given, try using general path to modules if not self.path and self.path_install: self.path = get_path(self.path_install) if self.path: if am_single_or_primary_process(): self.log.info("Importing *local* PolyChord from " + self.path) if not os.path.exists(os.path.realpath(self.path)): raise LoggedError( self.log, "The given path does not exist. " "Try installing PolyChord with " "'cobaya-install polychord -m [modules_path]") pc_build_path = get_build_path(self.path) if not pc_build_path: raise LoggedError( self.log, "Either PolyChord is not in the given folder, " "'%s', or you have not compiled it.", self.path) # Inserting the previously found path into the list of import folders sys.path.insert(0, pc_build_path) else: self.log.info("Importing *global* PolyChord.") try: import pypolychord from pypolychord.settings import PolyChordSettings self.pc = pypolychord except ImportError: raise LoggedError( self.log, "Couldn't find the PolyChord python interface. " "Make sure that you have compiled it, and that you either\n" " (a) specify a path (you didn't) or\n" " (b) install the Python interface globally with\n" " '/path/to/PolyChord/python setup.py install --user'") # Prepare arguments and settings self.nDims = self.model.prior.d() self.nDerived = (len(self.model.parameterization.derived_params()) + len(self.model.prior) + len(self.model.likelihood._likelihoods)) if self.logzero is None: self.logzero = np.nan_to_num(-np.inf) if self.max_ndead == np.inf: self.max_ndead = -1 for p in ["nlive", "nprior", "max_ndead"]: setattr(self, p, read_dnumber(getattr(self, p), self.nDims, dtype=int)) # Fill the automatic ones if getattr(self, "feedback", None) is None: values = { logging.CRITICAL: 0, logging.ERROR: 0, logging.WARNING: 0, logging.INFO: 1, logging.DEBUG: 2 } self.feedback = values[self.log.getEffectiveLevel()] try: output_folder = getattr(self.output, "folder") output_prefix = getattr(self.output, "prefix") or "" self.read_resume = self.resuming except AttributeError: # dummy output -- no resume! self.read_resume = False from tempfile import gettempdir output_folder = gettempdir() if am_single_or_primary_process(): from random import random output_prefix = hex(int(random() * 16**6))[2:] else: output_prefix = None if more_than_one_process(): output_prefix = get_mpi_comm().bcast(output_prefix, root=0) self.base_dir = os.path.join(output_folder, self.base_dir) self.file_root = output_prefix if am_single_or_primary_process(): # Creating output folder, if it does not exist (just one process) if not os.path.exists(self.base_dir): os.makedirs(self.base_dir) # Idem, a clusters folder if needed -- notice that PolyChord's default # is "True", here "None", hence the funny condition below if self.do_clustering is not False: # None here means "default" try: os.makedirs(os.path.join(self.base_dir, clusters)) except OSError: # exists! pass self.log.info("Storing raw PolyChord output in '%s'.", self.base_dir) # Exploiting the speed hierarchy if self.blocking: speeds, blocks = self.model.likelihood._check_speeds_of_params( self.blocking) else: speeds, blocks = self.model.likelihood._speeds_of_params( int_speeds=True) blocks_flat = list(chain(*blocks)) self.ordering = [ blocks_flat.index(p) for p in self.model.parameterization.sampled_params() ] self.grade_dims = np.array([len(block) for block in blocks]) # bugfix: pypolychord's C interface for Fortran does not like int numpy types self.grade_dims = [int(x) for x in self.grade_dims] # Steps per block # NB: num_repeats is ignored by PolyChord when int "grade_frac" given, # so needs to be applied by hand. # Make sure that speeds are integer, and that the slowest is 1, # for a straightforward application of num_repeats speeds = relative_to_int(speeds, 1) # In num_repeats, `d` is interpreted as dimension of each block self.grade_frac = [ int(speed * read_dnumber(self.num_repeats, dim_block)) for speed, dim_block in zip(speeds, self.grade_dims) ] # Assign settings pc_args = [ "nlive", "num_repeats", "nprior", "do_clustering", "precision_criterion", "max_ndead", "boost_posterior", "feedback", "logzero", "posteriors", "equals", "compression_factor", "cluster_posteriors", "write_resume", "read_resume", "write_stats", "write_live", "write_dead", "base_dir", "grade_frac", "grade_dims", "feedback", "read_resume", "base_dir", "file_root", "grade_frac", "grade_dims" ] # As stated above, num_repeats is ignored, so let's not pass it pc_args.pop(pc_args.index("num_repeats")) self.pc_settings = PolyChordSettings( self.nDims, self.nDerived, seed=(self.seed if self.seed is not None else -1), **{ p: getattr(self, p) for p in pc_args if getattr(self, p) is not None }) # prior conversion from the hypercube bounds = self.model.prior.bounds( confidence_for_unbounded=self.confidence_for_unbounded) # Check if priors are bounded (nan's to inf) inf = np.where(np.isinf(bounds)) if len(inf[0]): params_names = self.model.parameterization.sampled_params() params = [params_names[i] for i in sorted(list(set(inf[0])))] raise LoggedError( self.log, "PolyChord needs bounded priors, but the parameter(s) '" "', '".join(params) + "' is(are) unbounded.") locs = bounds[:, 0] scales = bounds[:, 1] - bounds[:, 0] # This function re-scales the parameters AND puts them in the right order self.pc_prior = lambda x: (locs + np.array(x)[self.ordering] * scales ).tolist() # We will need the volume of the prior domain, since PolyChord divides by it self.logvolume = np.log(np.prod(scales)) # Prepare callback function if self.callback_function is not None: self.callback_function_callable = (get_external_function( self.callback_function)) self.last_point_callback = 0 # Prepare runtime live and dead points collections self.live = Collection(self.model, None, name="live", initial_size=self.pc_settings.nlive) self.dead = Collection(self.model, self.output, name="dead") self.n_sampled = len(self.model.parameterization.sampled_params()) self.n_derived = len(self.model.parameterization.derived_params()) self.n_priors = len(self.model.prior) self.n_likes = len(self.model.likelihood._likelihoods) # Done! if am_single_or_primary_process(): self.log.info("Calling PolyChord with arguments:") for p, v in inspect.getmembers(self.pc_settings, lambda a: not (callable(a))): if not p.startswith("_"): self.log.info(" %s: %s", p, v)
def initialize(self): """Imports the PolyChord sampler and prepares its arguments.""" # Allow global import if no direct path specification allow_global = not self.path if not self.path and self.packages_path: self.path = self.get_path(self.packages_path) self.pc = self.is_installed(path=self.path, allow_global=allow_global) if not self.pc: raise NotInstalledError( self.log, "Could not find PolyChord. Check error message above. " "To install it, run 'cobaya-install polychord --%s " "[packages_path]'", _packages_path_arg) # Prepare arguments and settings from pypolychord.settings import PolyChordSettings self.n_sampled = len(self.model.parameterization.sampled_params()) self.n_derived = len(self.model.parameterization.derived_params()) self.n_priors = len(self.model.prior) self.n_likes = len(self.model.likelihood) self.nDims = self.model.prior.d() self.nDerived = (self.n_derived + self.n_priors + self.n_likes) if self.logzero is None: self.logzero = np.nan_to_num(-np.inf) if self.max_ndead == np.inf: self.max_ndead = -1 self._quants_d_units = ["nlive", "max_ndead"] for p in self._quants_d_units: if getattr(self, p) is not None: setattr( self, p, NumberWithUnits(getattr(self, p), "d", scale=self.nDims, dtype=int).value) self._quants_nlive_units = ["nprior"] for p in self._quants_nlive_units: if getattr(self, p) is not None: setattr( self, p, NumberWithUnits(getattr(self, p), "nlive", scale=self.nlive, dtype=int).value) # Fill the automatic ones if getattr(self, "feedback", None) is None: values = { logging.CRITICAL: 0, logging.ERROR: 0, logging.WARNING: 0, logging.INFO: 1, logging.DEBUG: 2 } self.feedback = values[self.log.getEffectiveLevel()] # Prepare output folders and prefixes if self.output: self.file_root = self.output.prefix self.read_resume = self.output.is_resuming() else: output_prefix = share_mpi( hex(int(random() * 16**6))[2:] if is_main_process() else None) self.file_root = output_prefix # dummy output -- no resume! self.read_resume = False self.base_dir = self.get_base_dir(self.output) self.raw_clusters_dir = os.path.join(self.base_dir, self._clusters_dir) self.output.create_folder(self.base_dir) if self.do_clustering: self.clusters_folder = self.get_clusters_dir(self.output) self.output.create_folder(self.clusters_folder) self.mpi_info("Storing raw PolyChord output in '%s'.", self.base_dir) # Exploiting the speed hierarchy if self.blocking: blocks, oversampling_factors = self.model.check_blocking( self.blocking) else: if self.measure_speeds: self.model.measure_and_set_speeds(n=self.measure_speeds) blocks, oversampling_factors = self.model.get_param_blocking_for_sampler( oversample_power=self.oversample_power) self.mpi_info("Parameter blocks and their oversampling factors:") max_width = len(str(max(oversampling_factors))) for f, b in zip(oversampling_factors, blocks): self.mpi_info("* %" + "%d" % max_width + "d : %r", f, b) # Save blocking in updated info, in case we want to resume self._updated_info["blocking"] = list(zip(oversampling_factors, blocks)) blocks_flat = list(chain(*blocks)) self.ordering = [ blocks_flat.index(p) for p in self.model.parameterization.sampled_params() ] self.grade_dims = [len(block) for block in blocks] # Steps per block # NB: num_repeats is ignored by PolyChord when int "grade_frac" given, # so needs to be applied by hand. # In num_repeats, `d` is interpreted as dimension of each block self.grade_frac = [ int(o * read_dnumber(self.num_repeats, dim_block)) for o, dim_block in zip(oversampling_factors, self.grade_dims) ] # Assign settings pc_args = [ "nlive", "num_repeats", "nprior", "do_clustering", "precision_criterion", "max_ndead", "boost_posterior", "feedback", "logzero", "posteriors", "equals", "compression_factor", "cluster_posteriors", "write_resume", "read_resume", "write_stats", "write_live", "write_dead", "base_dir", "grade_frac", "grade_dims", "feedback", "read_resume", "base_dir", "file_root", "grade_frac", "grade_dims" ] # As stated above, num_repeats is ignored, so let's not pass it pc_args.pop(pc_args.index("num_repeats")) self.pc_settings = PolyChordSettings( self.nDims, self.nDerived, seed=(self.seed if self.seed is not None else -1), **{ p: getattr(self, p) for p in pc_args if getattr(self, p) is not None }) # prior conversion from the hypercube bounds = self.model.prior.bounds( confidence_for_unbounded=self.confidence_for_unbounded) # Check if priors are bounded (nan's to inf) inf = np.where(np.isinf(bounds)) if len(inf[0]): params_names = self.model.parameterization.sampled_params() params = [params_names[i] for i in sorted(list(set(inf[0])))] raise LoggedError( self.log, "PolyChord needs bounded priors, but the parameter(s) '" "', '".join(params) + "' is(are) unbounded.") locs = bounds[:, 0] scales = bounds[:, 1] - bounds[:, 0] # This function re-scales the parameters AND puts them in the right order self.pc_prior = lambda x: (locs + np.array(x)[self.ordering] * scales ).tolist() # We will need the volume of the prior domain, since PolyChord divides by it self.logvolume = np.log(np.prod(scales)) # Prepare callback function if self.callback_function is not None: self.callback_function_callable = (get_external_function( self.callback_function)) self.last_point_callback = 0 # Prepare runtime live and dead points collections self.live = Collection(self.model, None, name="live", initial_size=self.pc_settings.nlive) self.dead = Collection(self.model, self.output, name="dead") # Done! if is_main_process(): self.log.debug("Calling PolyChord with arguments:") for p, v in inspect.getmembers(self.pc_settings, lambda a: not (callable(a))): if not p.startswith("_"): self.log.debug(" %s: %s", p, v) self.mpi_info("Initialized!")
def initialise(self): """Initialises the sampler: creates the proposal distribution and draws the initial sample.""" self.log.info("Initializing") # Burning-in countdown -- the +1 accounts for the initial point (always accepted) self.burn_in_left = self.burn_in + 1 # One collection per MPI process: `name` is the MPI rank + 1 name = str(1 + (lambda r: r if r is not None else 0)(get_mpi_rank())) self.collection = Collection(self.parametrization, self.likelihood, self.output, name=name) self.current_point = OnePoint(self.parametrization, self.likelihood, self.output, name=name) # Use the standard steps by default self.get_new_sample = self.get_new_sample_metropolis # Prepare oversampling / fast-dragging if applicable self.effective_max_samples = self.max_samples if self.oversample and self.drag: self.log.error( "Choose either oversampling or fast-dragging, not both.") raise HandledException # if (self.oversample or self.drag) and len(set(factors)) == 1: # self.log.error("All block speeds are similar: " # "no dragging or oversampling possible.") # raise HandledException if self.oversample: factors, blocks = self.likelihood.speeds_of_params( oversampling_factors=True) self.oversampling_factors = factors # WIP: actually, we would have to re-normalise to the dimension of the blocks. self.log.info("Oversampling with factors:\n" + "\n".join([ " %d : %r" % (f, b) for f, b in zip(self.oversampling_factors, blocks) ])) # WIP: useless until likelihoods have STATES! self.log.error("Sorry, oversampling is WIP") raise HandledException elif self.drag: # WIP: for now, can only separate between theory and likelihoods # until likelihoods have states if not self.likelihood.theory: self.log.error( "WIP: dragging disabled for now when no theory code present." ) raise HandledException # if self.max_speed_slow < min(speeds) or self.max_speed_slow >= max(speeds): # self.log.error("The maximum speed considered slow, `max_speed_slow`, must be " # "%g <= `max_speed_slow < %g, and is %g", # min(speeds), max(speeds), self.max_speed_slow) # raise HandledException speeds, blocks = self.likelihood.speeds_of_params(int_speeds=True, fast_slow=True) if np.all(speeds == speeds[0]): self.log.error( "All speeds are equal: cannot drag! Make sure to define, " "especially, the speed of the fastest likelihoods.") self.i_last_slow_block = 0 # just theory can be slow for now fast_params = list(chain(*blocks[1 + self.i_last_slow_block:])) self.n_slow = sum( len(blocks[i]) for i in range(1 + self.i_last_slow_block)) self.drag_interp_steps = int(self.drag * np.round(min(speeds[1:]) / speeds[0])) self.log.info("Dragging with oversampling per step:\n" + "\n".join([ " %d : %r" % (f, b) for f, b in zip([1, self.drag_interp_steps], [blocks[0], fast_params]) ])) self.get_new_sample = self.get_new_sample_dragging else: _, blocks = self.likelihood.speeds_of_params() self.oversampling_factors = [1 for b in blocks] self.n_slow = len(self.parametrization.sampled_params()) # Turn parameter names into indices blocks = [[ list(self.parametrization.sampled_params().keys()).index(p) for p in b ] for b in blocks] self.proposer = BlockedProposer( blocks, oversampling_factors=getattr(self, "oversampling_factors", None), i_last_slow_block=getattr(self, "i_last_slow_block", None), propose_scale=self.propose_scale) # Build the initial covariance matrix of the proposal covmat = self.initial_proposal_covmat() self.log.info("Sampling with covariance matrix:") self.log.info("%r", covmat) self.proposer.set_covariance(covmat) # Prepare callback function if self.callback_function is not None: self.callback_function_callable = (get_external_function( self.callback_function))