def _check_mod_set(self, mod_set, name): """ Checks validity of provided set of model outputs `mod_set` in this :obj:`~ModelLink` instance. Parameters ---------- mod_set : 1D or 2D array_like or dict Model output (set) to validate in this :obj:`~ModelLink` instance. name : str The name of the model output (set), which is used in the error message if the validation fails. Returns ------- mod_set : 1D or 2D :obj:`~numpy.ndarray` object The provided `mod_set` if the validation was successful. If `mod_set` was a dict, it will be converted to a :obj:`~numpy.ndarray` object (sorted on :attr:`~data_idx`). """ # Make logger logger = getCLogger('CHECK') logger.info("Validating provided set of model outputs %r." % (name)) # If mod_set is a dict, try to convert it to a NumPy array if isinstance(mod_set, dict): try: mod_set = np_array([mod_set[idx] for idx in self._data_idx]).T except KeyError as error: err_msg = ( "Input argument %r is missing data identifier '%r'!" % (name, error.args[0])) raise_error(err_msg, KeyError, logger) # Make sure that mod_set is a NumPy array mod_set = np_array(mod_set) # Raise error if mod_set is not 1D or 2D if not (mod_set.ndim == 1 or mod_set.ndim == 2): err_msg = ("Input argument %r is not one-dimensional or " "two-dimensional!" % (name)) raise_error(err_msg, ShapeError, logger) # Raise error if mod_set does not have n_data data values if not (mod_set.shape[-1] == self._n_data): err_msg = ("Input argument %r has incorrect number of data values " "(%i != %i)!" % (name, mod_set.shape[-1], self._n_data)) raise_error(err_msg, ShapeError, logger) # Check if mod_set solely consists out of floats mod_set = check_vals(mod_set, name, 'float') # Log again and return mod_set logger.info("Finished validating provided set of model outputs %r." % (name)) return (mod_set)
def advance_chain(sam_set): # Make sure that sam_set is 2D sam_set = np_array(sam_set, ndmin=2) # Check if sam_set is within parameter space and reject if not par_rng = pipe._modellink._par_rng accept = ((par_rng[:, 0] <= sam_set) * (sam_set <= par_rng[:, 1])).all(1) # Evaluate all non-rejected samples and accept if plausible emul_i = pipe._emulator._emul_i accept[accept] = pipe._make_call('_evaluate_sam_set', emul_i, sam_set[accept], 'project')[0] # Return which samples should be accepted or rejected return (accept)
def _check_md_var(self, md_var, name): """ Checks validity of provided set of model discrepancy variances `md_var` in this :obj:`~ModelLink` instance. Parameters ---------- md_var : 1D or 2D array_like or dict Model discrepancy variance set to validate in this :obj:`~ModelLink` instance. name : str The name of the model discrepancy set, which is used in the error message if the validation fails. Returns ------- md_var : 2D :obj:`~numpy.ndarray` object The (converted) provided `md_var` if the validation was successful. If `md_var` was a dict, it will be converted to a :obj:`~numpy.ndarray` object. """ # Make logger logger = getCLogger('CHECK') logger.info("Validating provided set of model discrepancy variances " "%r." % (name)) # If md_var is a dict, convert it to a NumPy array if isinstance(md_var, dict): md_var = np_array([md_var[idx] for idx in md_var.keys()]) # Make sure that md_var is a NumPy array md_var = np_array(md_var) # Raise error if md_var is not 1D or 2D if not (md_var.ndim == 1 or md_var.ndim == 2): err_msg = ("Input argument %r is not one-dimensional or " "two-dimensional!" % (name)) raise_error(err_msg, ShapeError, logger) # Check if md_var contains n_data values if not (md_var.shape[0] == self._n_data): err_msg = ("Received array of model discrepancy variances %r has " "incorrect number of data points (%i != %i)!" % (name, md_var.shape[0], self._n_data)) raise ShapeError(err_msg) # Check if single or dual values were given if (md_var.ndim == 1): md_var = np_array([md_var] * 2).T elif (md_var.shape[1] == 2): pass else: err_msg = ("Received array of model discrepancy variances %r has " "incorrect number of values (%i != 2)!" % (name, md_var.shape[1])) raise ShapeError(err_msg) # Check if all values are non-negative floats md_var = check_vals(md_var, 'md_var', 'nneg', 'float') # Log again and return md_var logger.info("Finished validating provided set of model discrepancy " "variances %r." % (name)) return (md_var)
def _check_sam_set(self, sam_set, name): """ Checks validity of provided set of model parameter samples `sam_set` in this :obj:`~ModelLink` instance. Parameters ---------- sam_set : 1D or 2D array_like or dict Parameter/sample set to validate in this :obj:`~ModelLink` instance. name : str The name of the parameter/sample set, which is used in the error message if the validation fails. Returns ------- sam_set : 1D or 2D :obj:`~numpy.ndarray` object The provided `sam_set` if the validation was successful. If `sam_set` was a dict, it will be converted to a :obj:`~numpy.ndarray` object. """ # Make logger logger = getCLogger('CHECK') logger.info("Validating provided set of model parameter samples %r." % (name)) # If sam_set is a dict, convert it to a NumPy array if isinstance(sam_set, dict): sam_set = np_array(sdict(sam_set).values()).T # Make sure that sam_set is a NumPy array sam_set = np_array(sam_set) # Raise error if sam_set is not 1D or 2D if not (sam_set.ndim == 1 or sam_set.ndim == 2): err_msg = ("Input argument %r is not one-dimensional or " "two-dimensional!" % (name)) raise_error(err_msg, ShapeError, logger) # Raise error if sam_set does not have n_par parameter values if not (sam_set.shape[-1] == self._n_par): err_msg = ("Input argument %r has incorrect number of parameters " "(%i != %i)!" % (name, sam_set.shape[-1], self._n_par)) raise_error(err_msg, ShapeError, logger) # Check if sam_set solely consists out of floats sam_set = check_vals(sam_set, name, 'float') # Check if all samples are within parameter space sam_set_2D = np_array(sam_set, ndmin=2) rng = self._par_rng check = np.apply_along_axis( lambda x: ((rng[:, 0] <= x) * (x <= rng[:, 1])).all(), 1, sam_set_2D) # If check is not empty (can be indexed), raise error try: index = np.argwhere(~check)[0] except IndexError: pass else: err_msg = ("Input argument '%s%s' is outside parameter space!" % (name, index if sam_set.ndim != 1 else '')) raise_error(err_msg, ValueError, logger) # Log again and return sam_set logger.info("Finished validating provided set of model parameter " "samples %r." % (name)) return (sam_set)
def convert_data(model_data): """ Converts the provided `model_data` into a full data dict, taking into account all formatting options, and returns it. This function can be used externally to check how the provided `model_data` would be interpreted when provided to the :class:`~prism.modellink.ModelLink` subclass. Its output can be used for the 'model_data' input argument. Parameters ---------- model_data : array_like, dict or str Anything that can be converted to a dict that provides model data information. Returns ------- data_dict : dict Dict with the provided `model_data` converted to its full format. """ # If a data file is given if isinstance(model_data, str): # Obtain absolute path to given file data_file = path.abspath(model_data) # Read the data file in as a string data_points = np.genfromtxt(data_file, dtype=(str), delimiter=':', autostrip=True) # Make sure that data_points is 2D data_points = np_array(data_points, ndmin=2) # Convert read-in data to dict model_data = dict(data_points) # If a data dict is given elif isinstance(model_data, dict): model_data = dict(model_data) # If anything else is given else: # Check if it can be converted to a dict try: model_data = dict(model_data) except Exception: raise TypeError("Input model data cannot be converted to type " "'dict'!") # Make empty data_dict data_dict = dict() # Loop over all items in model_data for key, value in model_data.items(): # Convert key to an actual data_idx idx = e13.split_seq(key) # Check if tmp_idx is not empty if not idx: raise e13.InputError("Model data contains a data point with no " "identifier!") # Convert value to an actual data point data = e13.split_seq(value) # Check if provided data value is valid val = check_vals(data[0], 'data_val%s' % (idx), 'float') # Extract data error and space # If length is two, centered error and no data space were given if(len(data) == 2): err = [check_vals(data[1], 'data_err%s' % (idx), 'float', 'pos')]*2 spc = 'lin' # If length is three, there are two possibilities elif(len(data) == 3): # If the third column contains a string, it is the data space if isinstance(data[2], str): err = [check_vals(data[1], 'data_err%s' % (idx), 'float', 'pos')]*2 spc = data[2] # If the third column contains no string, it is error interval else: err = check_vals(data[1:3], 'data_err%s' % (idx), 'float', 'pos') spc = 'lin' # If length is four+, error interval and data space were given else: err = check_vals(data[1:3], 'data_err%s' % (idx), 'float', 'pos') spc = data[3] # Check if valid data space has been provided spc = str(spc).replace("'", '').replace('"', '') if spc.lower() in ('lin', 'linear'): spc = 'lin' elif spc.lower() in ('log', 'log10', 'log_10'): spc = 'log10' elif spc.lower() in ('ln', 'loge', 'log_e'): spc = 'ln' else: raise ValueError("Input argument 'data_spc%s' is invalid (%r)!" % (idx, spc)) # Save data identifier as tuple or single element if(len(idx) == 1): idx = idx[0] else: idx = tuple(idx) # Add entire data point to data_dict data_dict[idx] = [val, *err, spc] # Return data_dict return(data_dict)
def convert_parameters(model_parameters): """ Converts the provided `model_parameters` into a full parameters dict, taking into account all formatting options, and returns it. This function can be used externally to check how the provided `model_parameters` would be interpreted when provided to the :class:`~prism.modellink.ModelLink` subclass. Its output can be used for the 'model_parameters' input argument. Parameters ---------- model_parameters : array_like, dict or str Anything that can be converted to a dict that provides model parameters information. Returns ------- par_dict : dict Dict with the provided `model_parameters` converted to its full format. """ # If a parameter file is given if isinstance(model_parameters, str): # Obtain absolute path to given file par_file = path.abspath(model_parameters) # Read the parameter file in as a string pars = np.genfromtxt(par_file, dtype=(str), delimiter=':', autostrip=True) # Make sure that pars is 2D pars = np_array(pars, ndmin=2) # Convert read-in parameters to dict model_parameters = sdict(pars) # If a parameter dict is given elif isinstance(model_parameters, dict): model_parameters = sdict(model_parameters) # If anything else is given else: # Check if it can be converted to a dict try: model_parameters = sdict(model_parameters) except Exception: raise TypeError("Input model parameters cannot be converted to" " type 'dict'!") # Initialize empty par_dict par_dict = sdict() # Loop over all items in model_parameters for name, values_str in model_parameters.items(): # Convert values_str to values values = e13.split_seq(values_str) # Check if provided name is a string name = check_vals(name, 'par_name[%r]' % (name), 'str') # Check if provided range consists of two floats par_rng = check_vals(values[:2], 'par_rng[%r]' % (name), 'float') # Check if provided lower bound is lower than the upper bound if(par_rng[0] >= par_rng[1]): raise ValueError("Input argument 'par_rng[%r]' does not define a " "valid parameter range (%f !< %f)!" % (name, par_rng[0], par_rng[1])) # Check if a float parameter estimate was provided try: est = check_vals(values[2], 'par_est[%r]' % (name), 'float') # If no estimate was provided, save it as None except IndexError: est = None # If no float was provided, check if it was None except TypeError as error: # If it is None, save it as such if(str(values[2]).lower() == 'none'): est = None # If it is not None, reraise the previous error else: raise error # If a float was provided, check if it is within parameter range else: if not(values[0] <= est <= values[1]): raise ValueError("Input argument 'par_est[%r]' is outside " "of defined parameter range!" % (name)) # Add parameter to par_dict par_dict[name] = [*par_rng, est] # Return par_dict return(par_dict)
def get_walkers(pipeline_obj, *, emul_i=None, init_walkers=None, req_n_walkers=None, unit_space=False, lnpost_fn=None, **kwargs): """ Analyzes proposed `init_walkers` and returns plausible `p0_walkers`. Analyzes sample set `init_walkers` in the provided `pipeline_obj` at iteration `emul_i` and returns all samples that are plausible to be used as starting positions for MCMC walkers. The provided samples and returned walkers should be/are given in unit space if `unit_space` is *True*. If `init_walkers` is *None*, returns :attr:`~prism.Pipeline.impl_sam` instead if it is available. This function needs to be called by all MPI ranks. Parameters ---------- pipeline_obj : :obj:`~prism.Pipeline` object The instance of the :class:`~prism.Pipeline` class that needs to be used for determining the plausibility of the proposed starting positions. Optional -------- %(emul_i)s init_walkers : 2D array_like, dict, int or None. Default: None Sample set of proposed initial MCMC walker positions. All plausible samples in `init_walkers` will be returned. If int, generate an LHD of provided size and return all plausible samples. If *None*, return :attr:`~prism.Pipeline.impl_sam` corresponding to iteration `emul_i` instead. req_n_walkers : int or None. Default: None The minimum required number of plausible starting positions that should be returned. If *None*, all plausible starting positions in `init_walkers` are returned instead. .. versionadded:: 1.2.0 unit_space : bool. Default: False Bool determining whether or not the provided samples and returned walkers are given in unit space. lnpost_fn : function or None. Default: None If function, call :func:`~get_hybrid_lnpost_fn` using `lnpost_fn` and the same values for `pipeline_obj`, `emul_i` and `unit_space`, and return the resulting function definition `hybrid_lnpost()`. Any additionally provided `kwargs` are also passed to it. Returns ------- n_walkers : int Number of returned MCMC walkers. Note that this number can be higher than `req_n_walkers` if not *None*. p0_walkers : 2D :obj:`~numpy.ndarray` object or dict Array containing plausible starting positions of valid MCMC walkers. If `init_walkers` was provided as a dict, `p0_walkers` will be a dict. hybrid_lnpost : function (if `lnpost_fn` is a function) The function returned by :func:`~get_hybrid_lnpost_fn` using `lnpost_fn`, `pipeline_obj`, `emul_i`, `unit_space` and `kwargs` as the input values. See also -------- :func:`~get_hybrid_lnpost_fn` Returns a function definition ``hybrid_lnpost(par_set, *args, **kwargs)``. :attr:`~prism.Pipeline.worker_mode` Special context manager within which all code is executed in worker mode. Notes ----- If `init_walkers` is *None* and emulator iteration `emul_i` has not been analyzed yet, a :class:`~prism._internal.RequestError` will be raised. If `req_n_walkers` is not *None*, a custom Metropolis-Hastings sampling algorithm is used to generate the required number of starting positions. All plausible samples in `init_walkers` are used as the start of every MCMC chain. Note that if the number of plausible samples in `init_walkers` is small, it is possible that the returned `p0_walkers` are not spread out properly over parameter space. """ # Make abbreviation for pipeline_obj pipe = pipeline_obj # Check if provided pipeline_obj is an instance of the Pipeline class if not isinstance(pipe, Pipeline): raise TypeError("Input argument 'pipeline_obj' must be an instance of " "the Pipeline class!") # Check if the provided pipeline_obj uses a default emulator if (pipe._emulator._emul_type != 'default'): raise e13.InputError("Input argument 'pipeline_obj' does not use a " "default emulator!") # Get emulator iteration emul_i = pipe._emulator._get_emul_i(emul_i) # If req_n_walkers is not None, check if it is an integer if req_n_walkers is not None: req_n_walkers = check_vals(req_n_walkers, 'req_n_walkers', 'int', 'pos') # Check if unit_space is a bool unit_space = check_vals(unit_space, 'unit_space', 'bool') # Assume that walkers are not to be returned as a dict walker_dict = False # Check if lnpost_fn is None and try to get hybrid_lnpost function if not if lnpost_fn is not None: try: hybrid_lnpost =\ get_hybrid_lnpost_fn(lnpost_fn, pipe, emul_i=emul_i, unit_space=unit_space, **kwargs) except e13.InputError: raise e13.InputError("Input argument 'lnpost_fn' is invalid!") # If init_walkers is None, use impl_sam of emul_i if init_walkers is None: # Controller checking if emul_i has already been analyzed if pipe._is_controller: # If iteration has not been analyzed, raise error if not pipe._n_eval_sam[emul_i]: raise RequestError("Emulator iteration %i has not been " "analyzed yet!" % (emul_i)) # If iteration is last iteration, init_walkers is current impl_sam elif (emul_i == pipe._emulator._emul_i): init_walkers = pipe._impl_sam # If iteration is not last, init_walkers is previous impl_sam else: init_walkers = pipe._emulator._sam_set[emul_i + 1] # Make sure to make a copy of init_walkers to avoid modifications init_walkers = init_walkers.copy() # Broadcast init_walkers to workers as p0_walkers p0_walkers = pipe._comm.bcast(init_walkers, 0) # If init_walkers is not None, use provided samples or LHD size else: # Controller checking if init_walkers is valid if pipe._is_controller: # If init_walkers is an int, create LHD of provided size if isinstance(init_walkers, int): # Check if provided integer is positive n_sam = check_vals(init_walkers, 'init_walkers', 'pos') # Obtain the par_space to sample in par_space = pipe._get_impl_space(emul_i) # If par_space is None, use the corresponding emul_space if par_space is None: par_space = pipe._emulator._emul_space[emul_i] # Create LHD of provided size init_walkers = e13.lhd(n_sam, pipe._modellink._n_par, par_space, 'center', pipe._criterion, 100) # If init_walkers is not an int, it must be array_like or dict else: # If init_walkers is provided as a dict, convert it if isinstance(init_walkers, dict): # Make sure that init_walkers is a SortedDict init_walkers = sdict(init_walkers) # Convert it to normal init_walkers = np_array(init_walkers.values()).T # Return p0_walkers as a dict walker_dict = True # Make sure that init_walkers is a NumPy array init_walkers = np_array(init_walkers, ndmin=2) # If unit_space is True, convert init_walkers to par_space if unit_space: init_walkers = pipe._modellink._to_par_space(init_walkers) # Check if init_walkers is valid init_walkers = pipe._modellink._check_sam_set( init_walkers, 'init_walkers') # Broadcast init_walkers to workers init_walkers = pipe._comm.bcast(init_walkers, 0) # Analyze init_walkers and save them as p0_walkers p0_walkers = pipe._evaluate_sam_set(emul_i, init_walkers, 'analyze') # Check if init_walkers is not empty and raise error if it is if not p0_walkers.shape[0]: raise e13.InputError("Input argument 'init_walkers' contains no " "plausible samples!") # If req_n_walkers is not None, use MH MCMC to find all required walkers if req_n_walkers is not None: n_walkers, p0_walkers = _do_mh_walkers(pipe, p0_walkers, req_n_walkers) else: p0_walkers = np.unique(p0_walkers, axis=0) n_walkers = p0_walkers.shape[0] # Check if p0_walkers needs to be converted if unit_space: p0_walkers = pipe._modellink._to_unit_space(p0_walkers) # Check if p0_walkers needs to be returned as a dict if walker_dict: p0_walkers = pipe._modellink._get_sam_dict(p0_walkers) # Check if hybrid_lnpost was requested and return it as well if so if lnpost_fn is not None: return (n_walkers, p0_walkers, hybrid_lnpost) else: return (n_walkers, p0_walkers)
def hybrid_lnpost(par_set, *args, **kwargs): """ Calculates the natural logarithm of the posterior probability of `par_set` using the provided function `lnpost_fn`, in addition to constraining it first with the emulator defined in the `pipeline_obj`. This function needs to be called by all MPI ranks unless called within the :attr:`~prism.Pipeline.worker_mode` context manager. Parameters ---------- par_set : 1D array_like or dict Sample to calculate the posterior probability for. This sample is first analyzed in `pipeline_obj` and only given to `lnpost_fn` if it is plausible. If `par_dict` is *True*, this is a dict. args : positional arguments Positional arguments that need to be passed to `lnpost_fn`. kwargs : keyword arguments Keyword arguments that need to be passed to `lnpost_fn`. Returns ------- lnp : float The natural logarithm of the posterior probability of `par_set`, as determined by `lnpost_fn` if `par_set` is plausible. If `impl_prior` is *True*, `lnp` is calculated as `lnprior` + `lnpost_fn()`, with `lnprior` the natural logarithm of the first implausibility cut-off value of `par_set` scaled with its maximum. """ # If par_dict is True, convert par_set to a NumPy array if par_dict: sam = np_array(sdict(par_set).values(), ndmin=2) else: sam = np_array(par_set, ndmin=2) # If unit_space is True, convert par_set to par_space if unit_space: sam = pipe._modellink._to_par_space(sam) # Check if par_set is within parameter space and return -inf if not par_rng = pipe._modellink._par_rng if not ((par_rng[:, 0] <= sam[0]) * (sam[0] <= par_rng[:, 1])).all(): return (-np.infty) # Check what sampling is requested and analyze par_set if impl_prior: impl_sam, lnprior = pipe._make_call('_evaluate_sam_set', emul_i, sam, 'hybrid') else: impl_sam = pipe._make_call('_evaluate_sam_set', emul_i, sam, 'analyze') lnprior = 0 # If par_set is plausible, call lnpost_fn if len(impl_sam): return (lnprior + lnpost_fn(par_set, *args, **kwargs)) # If par_set is not plausible, return -inf else: return (-np.infty)
def test_np_array(): array = np.array([1, 2]) assert np_array(array) is array