def uniform_partition_uniform_distribution_rectangle_size(data_set, Q_ref=None, rect_size=None, M=50, num_d_emulate=1E6): r""" Creates a simple function approximation of :math:`\rho_{\mathcal{D}}` where :math:`\rho_{\mathcal{D}}` is a uniform probability density on a generalized rectangle centered at ``Q_ref`` or the ``reference_value`` of a sample set. If ``Q_ref`` is not given the reference value is used. The support of this density is defined by ``rect_size``, which determines the size of the generalized rectangle. The simple function approximation is then defined by determining ``M`` Voronoi cells (i.e., "bins") partitioning :math:`\mathcal{D}`. These bins are only implicitly defined by ``M`` samples in :math:`\mathcal{D}`. Finally, the probabilities of each of these bins is computed by sampling from :math:`\rho{\mathcal{D}}` and using nearest neighbor searches to bin these samples in the ``M`` implicitly defined bins. The result is the simple function approximation denoted by :math:`\rho_{\mathcal{D},M}`. .. note:: ``data_set`` is only used to determine dimension. Note that all computations in the measure-theoretic framework that follow from this are for the fixed simple function approximation :math:`\rho_{\mathcal{D},M}`. :param int M: Defines number M samples in D used to define :math:`\rho_{\mathcal{D},M}` The choice of M is something of an "art" - play around with it and you can get reasonable results with a relatively small number here like 50. :param rect_size: Determines the size of the support of the uniform distribution on a generalized rectangle :type rect_size: double or list :param int num_d_emulate: Number of samples used to emulate using an MC assumption :param data_set: Sample set that the probability measure is defined for. :type data_set: :class:`~bet.sample.discretization` or :class:`~bet.sample.sample_set` or :class:`~numpy.ndarray` :param Q_ref: :math:`Q(`\lambda_{reference})` :type Q_ref: :class:`~numpy.ndarray` of size (mdim,) :rtype: :class:`~bet.sample.voronoi_sample_set` :returns: sample_set object defininng simple function approximation """ (num, dim, values, Q_ref) = check_inputs(data_set, Q_ref) if rect_size is None: raise wrong_argument_type("Rectangle size required.") elif not isinstance(rect_size, collections.Iterable): rect_size = rect_size * np.ones((dim,)) if np.any(np.less_equal(rect_size, 0)): msg = 'rect_size must be greater than 0' raise wrong_argument_type(msg) r''' Create M samples defining M Voronoi cells (i.e., "bins") in D used to define the simple function approximation :math:`\rho_{\mathcal{D},M}`. This does not have to be random, but here we assume this to be the case. We can choose these samples deterministically but that fails to scale with dimension efficiently. Note that these M samples are chosen for the sole purpose of determining the bins used to create the approximation to :math:`rho_{\mathcal{D}}`. We call these M samples "d_distr_samples" because they are samples on the data space and the distr implies these samples are chosen to create the approximation to the probability measure (distribution) on D. Note that we create these samples in a set containing the hyperrectangle in order to get output cells with zero probability. If all of the d_dstr_samples were taken from within the support of :math:`\rho_{\mathcal{D}}` then each of the M bins would have positive probability. This would in turn imply that the support of :math:`\rho_{\Lambda}` is all of :math:`\Lambda`. ''' if comm.rank == 0: d_distr_samples = 1.5 * rect_size * (np.random.random((M, dim)) - 0.5) + Q_ref else: d_distr_samples = np.empty((M, dim)) comm.Bcast([d_distr_samples, MPI.DOUBLE], root=0) # Initialize sample set object s_set = samp.voronoi_sample_set(dim) s_set.set_values(d_distr_samples) s_set.set_kdtree() r''' Compute probabilities in the M bins used to define :math:`\rho_{\mathcal{D},M}` by Monte Carlo approximations that in this context amount to binning with nearest neighbor approximations the num_d_emulate samples taken from :math:`\rho_{\mathcal{D}}`. ''' # Generate the samples from :math:`\rho_{\mathcal{D}}` num_d_emulate_local = int((num_d_emulate/comm.size) + \ (comm.rank < num_d_emulate%comm.size)) d_distr_emulate = rect_size * (np.random.random((num_d_emulate_local, dim)) - 0.5) + Q_ref # Bin these samples using nearest neighbor searches (_, k) = s_set.query(d_distr_emulate) count_neighbors = np.zeros((M,), dtype=np.int) for i in xrange(M): count_neighbors[i] = np.sum(np.equal(k, i)) # Use the binning to define :math:`\rho_{\mathcal{D},M}` ccount_neighbors = np.copy(count_neighbors) comm.Allreduce([count_neighbors, MPI.INT], [ccount_neighbors, MPI.INT], op=MPI.SUM) count_neighbors = ccount_neighbors rho_D_M = count_neighbors.astype(np.float64) / float(num_d_emulate) s_set.set_probabilities(rho_D_M) ''' NOTE: The computation of q_distr_prob, q_distr_emulate, q_distr_samples above, while possibly informed by the sampling of the map Q, do not require solving the model EVER! This can be done "offline" so to speak. The results can then be stored and accessed later by the algorithm using a completely different set of parameter samples and model solves. ''' if isinstance(data_set, samp.discretization): data_set._output_probability_set = s_set return s_set
def uniform_partition_normal_distribution(data_set, Q_ref, std, M, num_d_emulate=1E6): r""" Creates a simple function approximation of :math:`\rho_{\mathcal{D},M}` where :math:`\rho_{\mathcal{D},M}` is a multivariate normal probability density centered at ``Q_ref`` with standard deviation ``std`` using ``M`` bins sampled from a uniform distribution with a size 4 standard deviations in each direction. :param data_set: Sample set that the probability measure is defined for. :type data_set: :class:`~bet.sample.discretization` or :class:`~bet.sample.sample_set` or :class:`~numpy.ndarray` :param int M: Defines number M samples in D used to define :math:`\rho_{\mathcal{D},M}` The choice of M is something of an "art" - play around with it and you can get reasonable results with a relatively small number here like 50. :param int num_d_emulate: Number of samples used to emulate using an MC assumption :param Q_ref: :math:`Q(\lambda_{reference})` :type Q_ref: :class:`~numpy.ndarray` of size (mdim,) :param std: The standard deviation of each QoI :type std: :class:`~numpy.ndarray` of size (mdim,) :rtype: :class:`~bet.sample.voronoi_sample_set` :returns: sample_set object defininng simple function approximation """ r'''Create M samples defining M bins in D used to define :math:`\rho_{\mathcal{D},M}` rho_D is assumed to be a multi-variate normal distribution with mean Q_ref and standard deviation std.''' if not isinstance(Q_ref, collections.Iterable): Q_ref = np.array([Q_ref]) if not isinstance(std, collections.Iterable): std = np.array([std]) bin_size = 4.0 * std d_distr_samples = np.zeros((M, len(Q_ref))) if comm.rank == 0: d_distr_samples = bin_size * (np.random.random((M, len(Q_ref))) - 0.5) + Q_ref comm.Bcast([d_distr_samples, MPI.DOUBLE], root=0) # Initialize sample set object s_set = samp.voronoi_sample_set(len(Q_ref)) s_set.set_values(d_distr_samples) s_set.set_kdtree() r'''Now compute probabilities for :math:`\rho_{\mathcal{D},M}` by sampling from rho_D First generate samples of rho_D - I sometimes call this emulation''' num_d_emulate_local = int((num_d_emulate/comm.size) + \ (comm.rank < num_d_emulate%comm.size)) d_distr_emulate = np.zeros((num_d_emulate_local, len(Q_ref))) for i in xrange(len(Q_ref)): d_distr_emulate[:, i] = np.random.normal(Q_ref[i], std[i], num_d_emulate_local) # Now bin samples of rho_D in the M bins of D to compute rho_{D, M} if len(d_distr_samples.shape) == 1: d_distr_samples = np.expand_dims(d_distr_samples, axis=1) (_, k) = s_set.query(d_distr_emulate) count_neighbors = np.zeros((M,), dtype=np.int) # volumes = np.zeros((M,)) for i in xrange(M): Itemp = np.equal(k, i) count_neighbors[i] = np.sum(Itemp) r'''Now define probability of the d_distr_samples This together with d_distr_samples defines :math:`\rho_{\mathcal{D},M}`''' ccount_neighbors = np.copy(count_neighbors) comm.Allreduce([count_neighbors, MPI.INT], [ccount_neighbors, MPI.INT], op=MPI.SUM) count_neighbors = ccount_neighbors rho_D_M = count_neighbors.astype(np.float64) / float(num_d_emulate) s_set.set_probabilities(rho_D_M) # NOTE: The computation of q_distr_prob, q_distr_emulate, q_distr_samples # above, while informed by the sampling of the map Q, do not require # solving the model EVER! This can be done "offline" so to speak. if isinstance(data_set, samp.discretization): data_set._output_probability_set = s_set return s_set
def unif_normal(Q_ref, M, std, num_d_emulate=1E6): r""" Creates a simple function approximation of :math:`\rho_{\mathcal{D},M}` where :math:`\rho_{\mathcal{D},M}` is a multivariate normal probability density centered at Q_ref with standard deviation std using M bins sampled from a uniform distribution with a size 4 standard deviations in each direction. :param int M: Defines number M samples in D used to define :math:`\rho_{\mathcal{D},M}` The choice of M is something of an "art" - play around with it and you can get reasonable results with a relatively small number here like 50. :param int num_d_emulate: Number of samples used to emulate using an MC assumption :param Q_ref: :math:`Q(\lambda_{reference})` :type Q_ref: :class:`~numpy.ndarray` of size (mdim,) :param std: The standard deviation of each QoI :type std: :class:`~numpy.ndarray` of size (mdim,) :rtype: tuple :returns: (rho_D_M, d_distr_samples, d_Tree) where ``rho_D_M`` is (M,) and ``d_distr_samples`` are (M, mdim) :class:`~numpy.ndarray` and `d_Tree` is the :class:`~scipy.spatial.KDTree` for d_distr_samples """ r'''Create M smaples defining M bins in D used to define :math:`\rho_{\mathcal{D},M}` rho_D is assumed to be a multi-variate normal distribution with mean Q_ref and standard deviation std.''' bin_size = 4.0*std d_distr_samples = np.zeros((M, len(Q_ref))) if comm.rank == 0: d_distr_samples = bin_size*(np.random.random((M, len(Q_ref)))-0.5)+Q_ref comm.Bcast([d_distr_samples, MPI.DOUBLE], root=0) r'''Now compute probabilities for :math:`\rho_{\mathcal{D},M}` by sampling from rho_D First generate samples of rho_D - I sometimes call this emulation''' num_d_emulate = int(num_d_emulate/comm.size)+1 d_distr_emulate = np.zeros((num_d_emulate, len(Q_ref))) for i in range(len(Q_ref)): d_distr_emulate[:, i] = np.random.normal(Q_ref[i], std[i], num_d_emulate) # Now bin samples of rho_D in the M bins of D to compute rho_{D, M} if len(d_distr_samples.shape) == 1: d_distr_samples = np.expand_dims(d_distr_samples, axis=1) d_Tree = spatial.KDTree(d_distr_samples) (_, k) = d_Tree.query(d_distr_emulate) count_neighbors = np.zeros((M,), dtype=np.int) #volumes = np.zeros((M,)) for i in range(M): Itemp = np.equal(k, i) count_neighbors[i] = np.sum(Itemp) r'''Now define probability of the d_distr_samples This together with d_distr_samples defines :math:`\rho_{\mathcal{D},M}`''' ccount_neighbors = np.copy(count_neighbors) comm.Allreduce([count_neighbors, MPI.INT], [ccount_neighbors, MPI.INT], op=MPI.SUM) count_neighbors = ccount_neighbors rho_D_M = count_neighbors.astype(np.float64)/float(comm.size*num_d_emulate) # NOTE: The computation of q_distr_prob, q_distr_emulate, q_distr_samples # above, while informed by the sampling of the map Q, do not require # solving the model EVER! This can be done "offline" so to speak. return (rho_D_M, d_distr_samples, d_Tree)
def normal_partition_normal_distribution(data_set, Q_ref=None, std=1, M=1, num_d_emulate=1E6): r""" Creates a simple function approximation of :math:`\rho_{\mathcal{D},M}` where :math:`\rho_{\mathcal{D},M}` is a multivariate normal probability density centered at ``Q_ref`` with standard deviation ``std`` using ``M`` bins sampled from the given normal distribution. :param data_set: Sample set that the probability measure is defined for. :type data_set: :class:`~bet.sample.discretization` or :class:`~bet.sample.sample_set` or :class:`~numpy.ndarray` :param int M: Defines number M samples in D used to define :math:`\rho_{\mathcal{D},M}` The choice of M is something of an "art" - play around with it and you can get reasonable results with a relatively small number here like 50. :param int num_d_emulate: Number of samples used to emulate using an MC assumption :param Q_ref: :math:`Q(\lambda_{reference})` :type Q_ref: :class:`~numpy.ndarray` of size (mdim,) :param std: The standard deviation of each QoI :type std: :class:`~numpy.ndarray` of size (mdim,) :rtype: :class:`~bet.sample.voronoi_sample_set` :returns: sample_set object defining simple function approximation """ if Q_ref is None: Q_ref = infer_Q(data_set) import scipy.stats as stats r'''Create M samples defining M bins in D used to define :math:`\rho_{\mathcal{D},M}` rho_D is assumed to be a multi-variate normal distribution with mean Q_ref and standard deviation std.''' Q_ref = check_type(Q_ref, data_set) std = check_type(std, data_set) covariance = std ** 2 d_distr_samples = np.zeros((M, len(Q_ref))) logging.info("d_distr_samples.shape " + str(d_distr_samples.shape)) logging.info("Q_ref.shape " + str(Q_ref.shape)) logging.info("std.shape " + str(std.shape)) if comm.rank == 0: for i in range(len(Q_ref)): d_distr_samples[:, i] = np.random.normal(Q_ref[i], std[i], M) comm.Bcast([d_distr_samples, MPI.DOUBLE], root=0) # Initialize sample set object s_set = samp.voronoi_sample_set(len(Q_ref)) s_set.set_values(d_distr_samples) s_set.set_kdtree() r'''Now compute probabilities for :math:`\rho_{\mathcal{D},M}` by sampling from rho_D First generate samples of rho_D - I sometimes call this emulation''' num_d_emulate_local = int((num_d_emulate / comm.size) + (comm.rank < num_d_emulate % comm.size)) d_distr_emulate = np.zeros((num_d_emulate_local, len(Q_ref))) for i in range(len(Q_ref)): d_distr_emulate[:, i] = np.random.normal(Q_ref[i], std[i], num_d_emulate_local) # Now bin samples of rho_D in the M bins of D to compute rho_{D, M} if len(d_distr_samples.shape) == 1: d_distr_samples = np.expand_dims(d_distr_samples, axis=1) (_, k) = s_set.query(d_distr_emulate) count_neighbors = np.zeros((M,), dtype=np.int) volumes = np.zeros((M,)) for i in range(M): Itemp = np.equal(k, i) count_neighbors[i] = np.sum(Itemp) volumes[i] = np.sum(1.0 / stats.multivariate_normal.pdf (d_distr_emulate[Itemp, :], Q_ref, covariance)) # Now define probability of the d_distr_samples # This together with d_distr_samples defines :math:`\rho_{\mathcal{D},M}` ccount_neighbors = np.copy(count_neighbors) comm.Allreduce([count_neighbors, MPI.INT], [ccount_neighbors, MPI.INT], op=MPI.SUM) count_neighbors = ccount_neighbors cvolumes = np.copy(volumes) comm.Allreduce([volumes, MPI.DOUBLE], [cvolumes, MPI.DOUBLE], op=MPI.SUM) volumes = cvolumes rho_D_M = count_neighbors.astype(np.float64) * volumes rho_D_M = rho_D_M / np.sum(rho_D_M) s_set.set_probabilities(rho_D_M) s_set.set_volumes(volumes) # NOTE: The computation of q_distr_prob, q_distr_emulate, q_distr_samples # above, while informed by the sampling of the map Q, do not require # solving the model EVER! This can be done "offline" so to speak. if isinstance(data_set, samp.discretization): data_set._output_probability_set = s_set data_set.set_io_ptr(globalize=False) return s_set
def unif_unif(data, Q_ref, M=50, bin_ratio=0.2, num_d_emulate=1E6): r""" Creates a simple function approximation of :math:`\rho_{\mathcal{D}}` where :math:`\rho_{\mathcal{D}}` is a uniform probability density on a generalized rectangle centered at Q_ref. The support of this density is defined by bin_ratio, which determines the size of the generalized rectangle by scaling the circumscribing generalized rectangle of :math:`\mathcal{D}`. The simple function approximation is then defined by determining M Voronoi cells (i.e., "bins") partitioning :math:`\mathcal{D}`. These bins are only implicitly defined by M samples in :math:`\mathcal{D}`. Finally, the probabilities of each of these bins is computed by sampling from :math:`\rho{\mathcal{D}}` and using nearest neighbor searches to bin these samples in the M implicitly defined bins. The result is the simple function approximation denoted by :math:`\rho_{\mathcal{D},M}`. Note that all computations in the measure-theoretic framework that follow from this are for the fixed simple function approximation :math:`\rho_{\mathcal{D},M}`. :param int M: Defines number M samples in D used to define :math:`\rho_{\mathcal{D},M}` The choice of M is something of an "art" - play around with it and you can get reasonable results with a relatively small number here like 50. :param bin_ratio: The ratio used to determine the width of the uniform distributiion as ``bin_size = (data_max-data_min)*bin_ratio`` :type bin_ratio: double or list() :param int num_d_emulate: Number of samples used to emulate using an MC assumption :param data: Array containing QoI data where the QoI is mdim diminsional :type data: :class:`~numpy.ndarray` of size (num_samples, mdim) :param Q_ref: :math:`Q(`\lambda_{reference})` :type Q_ref: :class:`~numpy.ndarray` of size (mdim,) :rtype: tuple :returns: (rho_D_M, d_distr_samples, d_Tree) where ``rho_D_M`` is (M,) and ``d_distr_samples`` are (M, mdim) :class:`~numpy.ndarray` and `d_Tree` is the :class:`~scipy.spatial.KDTree` for d_distr_samples """ data = util.fix_dimensions_data(data) bin_size = (np.max(data, 0) - np.min(data, 0))*bin_ratio r''' Create M samples defining M Voronoi cells (i.e., "bins") in D used to define the simple function approximation :math:`\rho_{\mathcal{D},M}`. This does not have to be random, but here we assume this to be the case. We can choose these samples deterministically but that fails to scale with dimension efficiently. Note that these M samples are chosen for the sole purpose of determining the bins used to create the approximation to :math:`rho_{\mathcal{D}}`. We call these M samples "d_distr_samples" because they are samples on the data space and the distr implies these samples are chosen to create the approximation to the probability measure (distribution) on D. Note that we create these samples in a set containing the hyperrectangle in order to get output cells with zero probability. If all of the d_dstr_samples were taken from within the support of :math:`\rho_{\mathcal{D}}` then each of the M bins would have positive probability. This would in turn imply that the support of :math:`\rho_{\Lambda}` is all of :math:`\Lambda`. ''' if comm.rank == 0: d_distr_samples = 1.5*bin_size*(np.random.random((M, data.shape[1]))-0.5)+Q_ref else: d_distr_samples = np.empty((M, data.shape[1])) comm.Bcast([d_distr_samples, MPI.DOUBLE], root=0) r''' Compute probabilities in the M bins used to define :math:`\rho_{\mathcal{D},M}` by Monte Carlo approximations that in this context amount to binning with nearest neighbor approximations the num_d_emulate samples taken from :math:`\rho_{\mathcal{D}}`. ''' # Generate the samples from :math:`\rho_{\mathcal{D}}` num_d_emulate = int(num_d_emulate/comm.size)+1 d_distr_emulate = bin_size*(np.random.random((num_d_emulate, data.shape[1]))-0.5) + Q_ref # Bin these samples using nearest neighbor searches d_Tree = spatial.KDTree(d_distr_samples) (_, k) = d_Tree.query(d_distr_emulate) count_neighbors = np.zeros((M,), dtype=np.int) for i in range(M): count_neighbors[i] = np.sum(np.equal(k, i)) # Use the binning to define :math:`\rho_{\mathcal{D},M}` ccount_neighbors = np.copy(count_neighbors) comm.Allreduce([count_neighbors, MPI.INT], [ccount_neighbors, MPI.INT], op=MPI.SUM) count_neighbors = ccount_neighbors rho_D_M = count_neighbors.astype(np.float64) / float(num_d_emulate*comm.size) ''' NOTE: The computation of q_distr_prob, q_distr_emulate, q_distr_samples above, while possibly informed by the sampling of the map Q, do not require solving the model EVER! This can be done "offline" so to speak. The results can then be stored and accessed later by the algorithm using a completely different set of parameter samples and model solves. ''' return (rho_D_M, d_distr_samples, d_Tree)