def close(self, *args): """ Determines success (or not), chooses best (if MPI) and produces output (if requested). """ # If something failed if not hasattr(self, "result"): return if get_mpi_size(): results = get_mpi_comm().gather(self.result, root=0) if not get_mpi_rank(): self.result = results[np.argmin([r.fun for r in results])] if not get_mpi_rank(): if not self.result.success: self.log.error("Maximization failed! Here is the `scipy` raw result:\n%r", self.result) raise HandledException self.log.info("log%s maximized at %g", "likelihood" if self.ignore_prior else "posterior", -self.result.fun) post = self.model.logposterior(self.result.x) recomputed_max = sum(post.loglikes) if self.ignore_prior else post.logpost if not np.allclose(-self.result.fun, recomputed_max): self.log.error("Cannot reproduce result. Something bad happened. " "Recomputed max: %g at %r", recomputed_max, self.result.x) raise HandledException self.maximum = OnePoint( self.model, self.output, name="maximum", extension=("likelihood" if self.ignore_prior else "posterior")) self.maximum.add(self.result.x, derived=post.derived, logpost=post.logpost, logpriors=post.logpriors, loglikes=post.loglikes) self.log.info("Parameter values at maximum:\n%s"%self.maximum.data.to_string()) self.maximum._out_update()
def process_results(self, results, successes, affine_transform_baselines, transform_matrices): """ Determines success (or not), chooses best (if MPI or multiple starts) and produces output (if requested). """ evals_attr_ = evals_attr[self.method.lower()] results = list(chain(*results)) successes = list(chain(*successes)) affine_transform_baselines = list(chain(*affine_transform_baselines)) transform_matrices = list(chain(*transform_matrices)) if len(results) > 1: mins = [(getattr(r, evals_attr_) if s else np.inf) for r, s in zip(results, successes)] i_min: int = np.argmin(mins) # type: ignore else: i_min = 0 self.result = results[i_min] self._affine_transform_baseline = affine_transform_baselines[i_min] self._inv_affine_transform_matrix = transform_matrices[i_min] if not any(successes): raise LoggedError( self.log, "Minimization failed! Here is the raw result object:\n%s", str(self.result)) elif not all(successes): self.log.warning('Some minimizations failed!') elif len(results) > 1: self.log.info('Finished successfully!') # noinspection PyUnboundLocalVariable if max(mins) - min(mins) > 1: self.log.warning('Big spread in minima: %r', mins) elif max(mins) - min(mins) > 0.2: self.log.warning('Modest spread in minima: %r', mins) logp_min = -np.array(getattr(self.result, evals_attr_)) x_min = self.inv_affine_transform(self.result.x) self.log.info("-log(%s) minimized to %g", "likelihood" if self.ignore_prior else "posterior", -logp_min) recomputed_post_min = self.model.logposterior(x_min, cached=False) recomputed_logp_min = (sum(recomputed_post_min.loglikes) if self.ignore_prior else recomputed_post_min.logpost) if not np.allclose(logp_min, recomputed_logp_min, atol=1e-2): raise LoggedError( self.log, "Cannot reproduce log minimum to within 0.01. Maybe your " "likelihood is stochastic or large numerical error? " "Recomputed min: %g (was %g) at %r", recomputed_logp_min, logp_min, x_min) self.minimum = OnePoint(self.model, self.output, name="", extension=get_collection_extension(self.ignore_prior)) self.minimum.add(x_min, derived=recomputed_post_min.derived, logpost=recomputed_post_min.logpost, logpriors=recomputed_post_min.logpriors, loglikes=recomputed_post_min.loglikes) self.log.info( "Parameter values at minimum:\n%s", self.minimum.data.to_string()) self.minimum.out_update() self.dump_getdist()
def close(self, *args): """ Determines success (or not), chooses best (if MPI) and produces output (if requested). """ evals_attr_ = evals_attr[self.method.lower()] # If something failed if not hasattr(self, "result"): return if get_mpi_size(): results = get_mpi_comm().gather(self.result, root=0) _inv_affine_transform_matrices = get_mpi_comm().gather( self._inv_affine_transform_matrix, root=0) _affine_transform_baselines = get_mpi_comm().gather( self._affine_transform_baseline, root=0) if am_single_or_primary_process(): i_min = np.argmin([getattr(r, evals_attr_) for r in results]) self.result = results[i_min] self._inv_affine_transform_matrix = _inv_affine_transform_matrices[ i_min] self._affine_transform_baseline = _affine_transform_baselines[ i_min] if am_single_or_primary_process(): if not self.success: raise LoggedError( self.log, "Minimization failed! Here is the raw result object:\n%s", str(self.result)) logp_min = -np.array(getattr(self.result, evals_attr_)) x_min = self.inv_affine_transform(self.result.x) self.log.info("-log(%s) minimized to %g", "likelihood" if self.ignore_prior else "posterior", -logp_min) recomputed_post_min = self.model.logposterior(x_min, cached=False) recomputed_logp_min = (sum(recomputed_post_min.loglikes) if self.ignore_prior else recomputed_post_min.logpost) if not np.allclose(logp_min, recomputed_logp_min): raise LoggedError( self.log, "Cannot reproduce result. Maybe yout likelihood is stochastic? " "Recomputed min: %g (was %g) at %r", recomputed_logp_min, logp_min, x_min) self.minimum = OnePoint( self.model, self.output, name="", extension=("bestfit.txt" if self.ignore_prior else "minimum.txt")) self.minimum.add(x_min, derived=recomputed_post_min.derived, logpost=recomputed_post_min.logpost, logpriors=recomputed_post_min.logpriors, loglikes=recomputed_post_min.loglikes) self.log.info("Parameter values at minimum:\n%s", self.minimum.data.to_string()) self.minimum._out_update() self.dump_getdist()
def process_results(self): """ Determines success (or not), chooses best (if MPI) and produces output (if requested). """ evals_attr_ = evals_attr[self.method.lower()] # If something failed if not hasattr(self, "result"): return if more_than_one_process(): results = get_mpi_comm().gather(self.result, root=0) successes = get_mpi_comm().gather(self.success, root=0) _affine_transform_baselines = get_mpi_comm().gather( self._affine_transform_baseline, root=0) if is_main_process(): mins = [(getattr(r, evals_attr_) if s else np.inf) for r, s in zip(results, successes)] i_min = np.argmin(mins) self.result = results[i_min] self._affine_transform_baseline = _affine_transform_baselines[ i_min] else: successes = [self.success] if is_main_process(): if not any(successes): raise LoggedError( self.log, "Minimization failed! Here is the raw result object:\n%s", str(self.result)) elif not all(successes): self.log.warning('Some minimizations failed!') elif more_than_one_process(): if max(mins) - min(mins) > 1: self.log.warning('Big spread in minima: %r', mins) elif max(mins) - min(mins) > 0.2: self.log.warning('Modest spread in minima: %r', mins) logp_min = -np.array(getattr(self.result, evals_attr_)) x_min = self.inv_affine_transform(self.result.x) self.log.info("-log(%s) minimized to %g", "likelihood" if self.ignore_prior else "posterior", -logp_min) recomputed_post_min = self.model.logposterior(x_min, cached=False) recomputed_logp_min = (sum(recomputed_post_min.loglikes) if self.ignore_prior else recomputed_post_min.logpost) if not np.allclose(logp_min, recomputed_logp_min, atol=1e-2): raise LoggedError( self.log, "Cannot reproduce log minimum to within 0.01. Maybe your " "likelihood is stochastic or large numerical error? " "Recomputed min: %g (was %g) at %r", recomputed_logp_min, logp_min, x_min) self.minimum = OnePoint(self.model, self.output, name="", extension=get_collection_extension( self.ignore_prior)) self.minimum.add(x_min, derived=recomputed_post_min.derived, logpost=recomputed_post_min.logpost, logpriors=recomputed_post_min.logpriors, loglikes=recomputed_post_min.loglikes) self.log.info("Parameter values at minimum:\n%s", self.minimum.data.to_string()) self.minimum.out_update() self.dump_getdist()
def initialize(self): """Initializes the sampler: creates the proposal distribution and draws the initial sample.""" self.log.debug("Initializing") for p in [ "burn_in", "max_tries", "output_every", "check_every", "callback_every" ]: setattr( self, p, read_dnumber(getattr(self, p), self.model.prior.d(), dtype=int)) if self.callback_every is None: self.callback_every = self.check_every # Burning-in countdown -- the +1 accounts for the initial point (always accepted) self.burn_in_left = self.burn_in + 1 # Max # checkpoints to wait, in case one process dies without sending MPI_ABORT self.been_waiting = 0 self.max_waiting = max(50, self.max_tries / self.model.prior.d()) if self.resuming and (max(self.mpi_size or 0, 1) != max( get_mpi_size(), 1)): self.log.error( "Cannot resume a sample with a different number of chains: " "was %d and now is %d.", max(self.mpi_size, 1), max(get_mpi_size(), 1)) raise HandledException if not self.resuming and self.output: # Delete previous files (if not "forced", the run would have already failed) if ((os.path.abspath(self.covmat_filename()) != os.path.abspath( str(self.covmat)))): try: os.remove(self.covmat_filename()) except OSError: pass # There may be more that chains than expected, # if #ranks was bigger in a previous run i = 0 while True: i += 1 collection_filename, _ = self.output.prepare_collection(str(i)) try: os.remove(collection_filename) except OSError: break # One collection per MPI process: `name` is the MPI rank + 1 name = str(1 + (lambda r: r if r is not None else 0)(get_mpi_rank())) self.collection = Collection(self.model, self.output, name=name, resuming=self.resuming) self.current_point = OnePoint(self.model, OutputDummy({}), name=name) # Use standard MH steps by default self.get_new_sample = self.get_new_sample_metropolis # Prepare oversampling / dragging if applicable self.effective_max_samples = self.max_samples if self.oversample and self.drag: self.log.error("Choose either oversampling or dragging, not both.") raise HandledException if self.oversample: factors, blocks = self.model.likelihood._speeds_of_params( int_speeds=True) self.oversampling_factors = factors self.log.info("Oversampling with factors:\n" + "\n".join([ " %d : %r" % (f, b) for f, b in zip(self.oversampling_factors, blocks) ])) self.i_last_slow_block = None # No way right now to separate slow and fast slow_params = list(self.model.parameterization.sampled_params()) elif self.drag: speeds, blocks = self.model.likelihood._speeds_of_params( fast_slow=True, int_speeds=True) # For now, no blocking inside either fast or slow: just 2 blocks self.i_last_slow_block = 0 if np.all(speeds == speeds[0]): self.log.error( "All speeds are equal or too similar: cannot drag! " "Make sure to define accurate likelihoods' speeds.") raise HandledException # Make the 1st factor 1: speeds = [1, speeds[1] / speeds[0]] # Target: dragging step taking as long as slow step self.drag_interp_steps = self.drag * speeds[1] # Per dragging step, the (fast) posterior is evaluated *twice*, self.drag_interp_steps /= 2 self.drag_interp_steps = int(np.round(self.drag_interp_steps)) fast_params = list(chain(*blocks[1 + self.i_last_slow_block:])) # Not too much or too little dragging drag_limits = [(int(l) * len(fast_params) if l is not None else l) for l in self.drag_limits] if drag_limits[ 0] is not None and self.drag_interp_steps < drag_limits[0]: self.log.warning( "Number of dragging steps clipped from below: was not " "enough to efficiently explore the fast directions -- " "avoid this limit by decreasing 'drag_limits[0]'.") self.drag_interp_steps = drag_limits[0] if drag_limits[ 1] is not None and self.drag_interp_steps > drag_limits[1]: self.log.warning( "Number of dragging steps clipped from above: " "excessive, probably inefficient, exploration of the " "fast directions -- " "avoid this limit by increasing 'drag_limits[1]'.") self.drag_interp_steps = drag_limits[1] # Re-scale steps between checkpoint and callback to the slow dimensions only slow_params = list(chain(*blocks[:1 + self.i_last_slow_block])) self.n_slow = len(slow_params) for p in ["check_every", "callback_every"]: setattr( self, p, int(getattr(self, p) * self.n_slow / self.model.prior.d())) self.log.info("Dragging with oversampling per step:\n" + "\n".join([ " %d : %r" % (f, b) for f, b in zip([1, self.drag_interp_steps], [blocks[0], fast_params]) ])) self.get_new_sample = self.get_new_sample_dragging else: _, blocks = self.model.likelihood._speeds_of_params() self.oversampling_factors = [1 for b in blocks] slow_params = list(self.model.parameterization.sampled_params()) self.n_slow = len(slow_params) # Turn parameter names into indices self.blocks = [[ list(self.model.parameterization.sampled_params()).index(p) for p in b ] for b in blocks] self.proposer = BlockedProposer( self.blocks, oversampling_factors=self.oversampling_factors, i_last_slow_block=self.i_last_slow_block, proposal_scale=self.proposal_scale) # Build the initial covariance matrix of the proposal, or load from checkpoint if self.resuming: covmat = np.loadtxt(self.covmat_filename()) self.log.info("Covariance matrix from checkpoint.") else: covmat = self.initial_proposal_covmat(slow_params=slow_params) self.log.info("Initial covariance matrix.") self.log.debug( "Sampling with covmat:\n%s", DataFrame( covmat, columns=self.model.parameterization.sampled_params(), index=self.model.parameterization.sampled_params()).to_string( line_width=_line_width)) self.proposer.set_covariance(covmat) # Prepare callback function if self.callback_function is not None: self.callback_function_callable = (get_external_function( self.callback_function))
def initialise(self): """Initialises the sampler: creates the proposal distribution and draws the initial sample.""" self.log.info("Initializing") # Burning-in countdown -- the +1 accounts for the initial point (always accepted) self.burn_in_left = self.burn_in + 1 # One collection per MPI process: `name` is the MPI rank + 1 name = str(1 + (lambda r: r if r is not None else 0)(get_mpi_rank())) self.collection = Collection(self.parametrization, self.likelihood, self.output, name=name) self.current_point = OnePoint(self.parametrization, self.likelihood, self.output, name=name) # Use the standard steps by default self.get_new_sample = self.get_new_sample_metropolis # Prepare oversampling / fast-dragging if applicable self.effective_max_samples = self.max_samples if self.oversample and self.drag: self.log.error( "Choose either oversampling or fast-dragging, not both.") raise HandledException # if (self.oversample or self.drag) and len(set(factors)) == 1: # self.log.error("All block speeds are similar: " # "no dragging or oversampling possible.") # raise HandledException if self.oversample: factors, blocks = self.likelihood.speeds_of_params( oversampling_factors=True) self.oversampling_factors = factors # WIP: actually, we would have to re-normalise to the dimension of the blocks. self.log.info("Oversampling with factors:\n" + "\n".join([ " %d : %r" % (f, b) for f, b in zip(self.oversampling_factors, blocks) ])) # WIP: useless until likelihoods have STATES! self.log.error("Sorry, oversampling is WIP") raise HandledException elif self.drag: # WIP: for now, can only separate between theory and likelihoods # until likelihoods have states if not self.likelihood.theory: self.log.error( "WIP: dragging disabled for now when no theory code present." ) raise HandledException # if self.max_speed_slow < min(speeds) or self.max_speed_slow >= max(speeds): # self.log.error("The maximum speed considered slow, `max_speed_slow`, must be " # "%g <= `max_speed_slow < %g, and is %g", # min(speeds), max(speeds), self.max_speed_slow) # raise HandledException speeds, blocks = self.likelihood.speeds_of_params(int_speeds=True, fast_slow=True) if np.all(speeds == speeds[0]): self.log.error( "All speeds are equal: cannot drag! Make sure to define, " "especially, the speed of the fastest likelihoods.") self.i_last_slow_block = 0 # just theory can be slow for now fast_params = list(chain(*blocks[1 + self.i_last_slow_block:])) self.n_slow = sum( len(blocks[i]) for i in range(1 + self.i_last_slow_block)) self.drag_interp_steps = int(self.drag * np.round(min(speeds[1:]) / speeds[0])) self.log.info("Dragging with oversampling per step:\n" + "\n".join([ " %d : %r" % (f, b) for f, b in zip([1, self.drag_interp_steps], [blocks[0], fast_params]) ])) self.get_new_sample = self.get_new_sample_dragging else: _, blocks = self.likelihood.speeds_of_params() self.oversampling_factors = [1 for b in blocks] self.n_slow = len(self.parametrization.sampled_params()) # Turn parameter names into indices blocks = [[ list(self.parametrization.sampled_params().keys()).index(p) for p in b ] for b in blocks] self.proposer = BlockedProposer( blocks, oversampling_factors=getattr(self, "oversampling_factors", None), i_last_slow_block=getattr(self, "i_last_slow_block", None), propose_scale=self.propose_scale) # Build the initial covariance matrix of the proposal covmat = self.initial_proposal_covmat() self.log.info("Sampling with covariance matrix:") self.log.info("%r", covmat) self.proposer.set_covariance(covmat) # Prepare callback function if self.callback_function is not None: self.callback_function_callable = (get_external_function( self.callback_function))