コード例 #1
0
ファイル: plotP.py プロジェクト: yangleicq/BET
def calculate_1D_marginal_probs(sample_set, nbins=20):
    r"""
    This calculates every single marginal of the probability measure
    described by the probabilities within the sample_set object.
    If the sample_set object is a discretization object, we assume
    that the probabilities to be plotted are from the input space on the
    emulated samples
    (``discretization._emulated_input_sample_set._probabilties_local``).

    This assumes that the user has already run
    :meth:`~bet.calculateP.calculateP.prob_emulated`.

    :param sample_set: Object containing samples and probabilities
    :type sample_set: :class:`~bet.sample.sample_set_base` or 
        :class:`~bet.sample.discretization`
    :param nbins: Number of bins in each direction.
    :type nbins: :int or :class:`~numpy.ndarray` of shape (ndim,)
    :rtype: tuple
    :returns: (bins, marginals)

    """
    if isinstance(sample_set, sample.discretization):
        sample_obj = sample_set._emulated_input_sample_set
        if sample_obj is None:
            raise missing_attribute("Missing emulated_input_sample_set")
    elif isinstance(sample_set, sample.sample_set_base):
        sample_obj = sample_set
    else:
        raise bad_object("Improper sample object")

    # Check for local probabilities
    if sample_obj._probabilities_local is None:
        if sample_obj._probabilities is None:
            raise missing_attribute("Missing probabilities")
        else:
            sample_obj.global_to_local()

    # Make list of bins if only an integer is given
    if isinstance(nbins, int):
        nbins = nbins * np.ones(sample_obj.get_dim(), dtype=np.int)

    # Create bins
    bins = []
    for i in range(sample_obj.get_dim()):
        bins.append(
            np.linspace(sample_obj.get_domain()[i][0],
                        sample_obj.get_domain()[i][1], nbins[i] + 1))

    # Calculate marginals
    marginals = {}
    for i in range(sample_obj.get_dim()):
        [marg, _] = np.histogram(sample_obj.get_values_local()[:, i],
                                 bins=bins[i],
                                 weights=sample_obj.get_probabilities_local())
        marg_temp = np.copy(marg)
        comm.Allreduce([marg, MPI.DOUBLE], [marg_temp, MPI.DOUBLE], op=MPI.SUM)
        marginals[i] = marg_temp

    return (bins, marginals)
コード例 #2
0
def calculate_2D_marginal_probs(P_samples, samples, lam_domain, nbins=20):
    """
    This calculates every pair of marginals (or joint in 2d case) of
    input probability measure defined by P_samples on a rectangular grid.

    :param P_samples: Probabilities.
    :type P_samples: :class:`~numpy.ndarray` of shape (num_samples,)
    :param samples: The samples in parameter space for which the model was run.
    :type samples: :class:`~numpy.ndarray` of shape (num_samples, ndim)
    :param lam_domain: The domain for each parameter for the model.
    :type lam_domain: :class:`~numpy.ndarray` of shape (ndim, 2)
    :param nbins: Number of bins in each direction.
    :type nbins: :int or :class:`~numpy.ndarray` of shape (ndim,)
    :rtype: tuple
    :returns: (bins, marginals)

    """
    if len(samples.shape) == 1:
        samples = np.expand_dims(samples, axis=1)
    num_samples = samples.shape[0]
    num_dim = samples.shape[1]

    # Make list of bins if only an integer is given
    if isinstance(nbins, int):
        nbins = nbins * np.ones(num_dim, dtype=np.int)

    # Create bins
    bins = []
    for i in range(num_dim):
        bins.append(
            np.linspace(lam_domain[i][0], lam_domain[i][1], nbins[i] + 1))

    # Calculate marginals
    marginals = {}
    for i in range(num_dim):
        for j in range(i + 1, num_dim):
            (marg, _) = np.histogramdd(samples[:, [i, j]],
                                       bins=[bins[i], bins[j]],
                                       weights=P_samples)
            marg = np.ascontiguousarray(marg)
            marg_temp = np.copy(marg)
            comm.Allreduce([marg, MPI.DOUBLE], [marg_temp, MPI.DOUBLE],
                           op=MPI.SUM)
            marginals[(i, j)] = marg_temp

    return (bins, marginals)
コード例 #3
0
def uniform_partition_uniform_distribution_rectangle_size(data_set, 
                                                          Q_ref=None,
                                                          rect_size=None, 
                                                          M=50,
                                                          num_d_emulate=1E6):
    r"""
    Creates a simple function approximation of :math:`\rho_{\mathcal{D}}`
    where :math:`\rho_{\mathcal{D}}` is a uniform probability density on
    a generalized rectangle centered at ``Q_ref`` or the ``reference_value``
    of a sample set. If ``Q_ref`` is not given the reference value is used.
    The support of this density is defined by ``rect_size``, which determines
    the size of the generalized rectangle.
    The simple function approximation is then defined by determining ``M``
    Voronoi cells (i.e., "bins") partitioning :math:`\mathcal{D}`. These
    bins are only implicitly defined by ``M`` samples in :math:`\mathcal{D}`.
    Finally, the probabilities of each of these bins is computed by
    sampling from :math:`\rho{\mathcal{D}}` and using nearest neighbor
    searches to bin these samples in the ``M`` implicitly defined bins.
    The result is the simple function approximation denoted by
    :math:`\rho_{\mathcal{D},M}`.

    .. note::

        ``data_set`` is only used to determine dimension.

    Note that all computations in the measure-theoretic framework that
    follow from this are for the fixed simple function approximation
    :math:`\rho_{\mathcal{D},M}`.

    :param int M: Defines number M samples in D used to define
        :math:`\rho_{\mathcal{D},M}` The choice of M is something of an "art" -
        play around with it and you can get reasonable results with a
        relatively small number here like 50.
    :param rect_size: Determines the size of the support of the
        uniform distribution on a generalized rectangle
    :type rect_size: double or list
    :param int num_d_emulate: Number of samples used to emulate using an MC
        assumption
    :param data_set: Sample set that the probability measure is defined for.
    :type data_set: :class:`~bet.sample.discretization` 
        or :class:`~bet.sample.sample_set` or :class:`~numpy.ndarray`
    :param Q_ref: :math:`Q(`\lambda_{reference})`
    :type Q_ref: :class:`~numpy.ndarray` of size (mdim,)

    :rtype: :class:`~bet.sample.voronoi_sample_set`
    :returns: sample_set object defininng simple function approximation
    """

    (num, dim, values, Q_ref) = check_inputs(data_set, Q_ref)

    if rect_size is None:
        raise wrong_argument_type("Rectangle size required.")
    elif not isinstance(rect_size, collections.Iterable):
        rect_size = rect_size * np.ones((dim,))
    if np.any(np.less_equal(rect_size, 0)):
        msg = 'rect_size must be greater than 0'
        raise wrong_argument_type(msg)

    r'''
    Create M samples defining M Voronoi cells (i.e., "bins") in D used to
    define the simple function approximation :math:`\rho_{\mathcal{D},M}`.

    This does not have to be random, but here we assume this to be the case.
    We can choose these samples deterministically but that fails to scale with
    dimension efficiently.

    Note that these M samples are chosen for the sole purpose of determining
    the bins used to create the approximation to :math:`rho_{\mathcal{D}}`.

    We call these M samples "d_distr_samples" because they are samples on the
    data space and the distr implies these samples are chosen to create the
    approximation to the probability measure (distribution) on D.

    Note that we create these samples in a set containing the hyperrectangle in
    order to get output cells with zero probability. If all of the
    d_dstr_samples were taken from within the support of
    :math:`\rho_{\mathcal{D}}` then each of the M bins would have positive
    probability. This would in turn imply that the support of
    :math:`\rho_{\Lambda}` is all of :math:`\Lambda`.
    '''

    if comm.rank == 0:
        d_distr_samples = 1.5 * rect_size * (np.random.random((M,
                                            dim)) - 0.5) + Q_ref
    else:
        d_distr_samples = np.empty((M, dim))
    comm.Bcast([d_distr_samples, MPI.DOUBLE], root=0)

    # Initialize sample set object
    s_set = samp.voronoi_sample_set(dim)
    s_set.set_values(d_distr_samples)
    s_set.set_kdtree()

    r'''
    Compute probabilities in the M bins used to define
    :math:`\rho_{\mathcal{D},M}` by Monte Carlo approximations
    that in this context amount to binning with nearest neighbor
    approximations the num_d_emulate samples taken from
    :math:`\rho_{\mathcal{D}}`.
    '''
    # Generate the samples from :math:`\rho_{\mathcal{D}}`
    num_d_emulate_local = int((num_d_emulate/comm.size) + \
                        (comm.rank < num_d_emulate%comm.size))
    d_distr_emulate = rect_size * (np.random.random((num_d_emulate_local,
                                                     dim)) - 0.5) + Q_ref

    # Bin these samples using nearest neighbor searches
    (_, k) = s_set.query(d_distr_emulate)

    count_neighbors = np.zeros((M,), dtype=np.int)
    for i in xrange(M):
        count_neighbors[i] = np.sum(np.equal(k, i))

    # Use the binning to define :math:`\rho_{\mathcal{D},M}`
    ccount_neighbors = np.copy(count_neighbors)
    comm.Allreduce([count_neighbors, MPI.INT], [ccount_neighbors, MPI.INT],
                   op=MPI.SUM)
    count_neighbors = ccount_neighbors
    rho_D_M = count_neighbors.astype(np.float64) / float(num_d_emulate)
    s_set.set_probabilities(rho_D_M)

    '''
    NOTE: The computation of q_distr_prob, q_distr_emulate, q_distr_samples
    above, while possibly informed by the sampling of the map Q, do not require
    solving the model EVER! This can be done "offline" so to speak. The results
    can then be stored and accessed later by the algorithm using a completely
    different set of parameter samples and model solves.
    '''
    if isinstance(data_set, samp.discretization):
        data_set._output_probability_set = s_set
    return s_set
コード例 #4
0
def user_partition_user_distribution(data_set, data_partition_set,
                                          data_distribution_set):
    r"""
    Creates a user defined simple function approximation of a user
    defined distribution. The simple function discretization is
    specified in the ``data_partition_set``, and the set of i.i.d.
    samples from the distribution is specified in the
    ``data_distribution_set``.

    :param data_set: Sample set that the probability measure is defined for.
    :type data_set: :class:`~bet.sample.discretization` or
        :class:`~bet.sample.sample_set` or :class:`~numpy.ndarray`
    :param data_partition_set: Sample set defining the discretization
        of the data space into Voronoi cells for which a simple function
        is defined upon.
    :type data_partition_set: :class:`~bet.sample.discretization` or
        :class:`~bet.sample.sample_set` or :class:`~numpy.ndarray`
    :param data_distribution_set: Sample set containing the i.i.d. samples
        from the distribution on the data space that are binned within the
        Voronoi cells implicitly defined by the data_discretization_set.
    :type data_distribution_set: :class:`~bet.sample.discretization` or
        :class:`~bet.sample.sample_set` or :class:`~numpy.ndarray`

    :rtype: :class:`~bet.sample.voronoi_sample_set`
    :returns: sample_set object defininng simple function approximation
    """

    if isinstance(data_set, samp.sample_set_base):
        s_set = data_set.copy()
        dim = s_set._dim
    elif isinstance(data_set, samp.discretization):
        s_set = data_set._output_sample_set.copy()
        dim = s_set._dim
    elif isinstance(data_set, np.ndarray):
        dim = data_set.shape[1]
        values = data_set
        s_set = samp.sample_set(dim=dim)
        s_set.set_values(values)
    else:
        msg = "The first argument must be of type bet.sample.sample_set, "
        msg += "bet.sample.discretization or np.ndarray"
        raise wrong_argument_type(msg)

    if isinstance(data_partition_set, samp.sample_set_base):
        M = data_partition_set.check_num()
        d_distr_samples = data_partition_set._values
        dim_simpleFun = d_distr_samples.shape[1]
    elif isinstance(data_partition_set, samp.discretization):
        M = data_partition_set.check_nums()
        d_distr_samples = data_partition_set._output_sample_set._values
        dim_simpleFun = d_distr_samples.shape[1]
    elif isinstance(data_partition_set, np.ndarray):
        M = data_partition_set.shape[0]
        dim_simpleFun = data_partition_set.shape[1]
        d_distr_samples = data_partition_set
    else:
        msg = "The second argument must be of type bet.sample.sample_set, "
        msg += "bet.sample.discretization or np.ndarray"
        raise wrong_argument_type(msg)

    if isinstance(data_distribution_set, samp.sample_set_base):
        d_distr_emulate = data_distribution_set._values
        dim_MonteCarlo = d_distr_emulate.shape[1]
        num_d_emulate = data_distribution_set.check_num()
    elif isinstance(data_distribution_set, samp.discretization):
        d_distr_emulate = data_distribution_set._output_sample_set._values
        dim_MonteCarlo = d_distr_emulate.shape[1]
        num_d_emulate = data_distribution_set.check_nums()
    elif isinstance(data_distribution_set, np.ndarray):
        num_d_emulate = data_distribution_set.shape[0]
        dim_MonteCarlo = data_distribution_set.shape[1]
        d_distr_emulate = data_distribution_set
    else:
        msg = "The second argument must be of type bet.sample.sample_set, "
        msg += "bet.sample.discretization or np.ndarray"
        raise wrong_argument_type(msg)

    if np.not_equal(dim_MonteCarlo, dim) or np.not_equal(dim_simpleFun, dim):
        msg = "The argument types have conflicting dimensions"
        raise wrong_argument_type(msg)

    # Initialize sample set object
    s_set = samp.sample_set(dim)
    s_set.set_values(d_distr_samples)
    s_set.set_kdtree()

    (_, k) = s_set.query(d_distr_emulate)

    count_neighbors = np.zeros((M,), dtype=np.int)
    for i in xrange(M):
        count_neighbors[i] = np.sum(np.equal(k, i))

    # Use the binning to define :math:`\rho_{\mathcal{D},M}`
    ccount_neighbors = np.copy(count_neighbors)
    comm.Allreduce([count_neighbors, MPI.INT], [ccount_neighbors, MPI.INT],
                   op=MPI.SUM)
    count_neighbors = ccount_neighbors
    rho_D_M = count_neighbors.astype(np.float64) / \
              float(num_d_emulate * comm.size)
    s_set.set_probabilities(rho_D_M)

    if isinstance(data_set, samp.discretization):
        data_set._output_probability_set = s_set
    return s_set
コード例 #5
0
def uniform_partition_normal_distribution(data_set, Q_ref, std, M,
        num_d_emulate=1E6): 
    r"""
    Creates a simple function approximation of :math:`\rho_{\mathcal{D},M}`
    where :math:`\rho_{\mathcal{D},M}` is a multivariate normal probability
    density centered at ``Q_ref`` with standard deviation ``std`` using
    ``M`` bins sampled from a uniform distribution with a size 4 standard
    deviations in each direction.

    :param data_set: Sample set that the probability measure is defined for.
    :type data_set: :class:`~bet.sample.discretization` or
        :class:`~bet.sample.sample_set` or :class:`~numpy.ndarray`
    :param int M: Defines number M samples in D used to define
        :math:`\rho_{\mathcal{D},M}` The choice of M is something of an "art" -
        play around with it and you can get reasonable results with a
        relatively small number here like 50.
    :param int num_d_emulate: Number of samples used to emulate using an MC
        assumption
    :param Q_ref: :math:`Q(\lambda_{reference})`
    :type Q_ref: :class:`~numpy.ndarray` of size (mdim,)
    :param std: The standard deviation of each QoI
    :type std: :class:`~numpy.ndarray` of size (mdim,)

    :rtype: :class:`~bet.sample.voronoi_sample_set`
    :returns: sample_set object defininng simple function approximation

    """
    r'''Create M samples defining M bins in D used to define
    :math:`\rho_{\mathcal{D},M}` rho_D is assumed to be a multi-variate normal
    distribution with mean Q_ref and standard deviation std.'''
    if not isinstance(Q_ref, collections.Iterable):
        Q_ref = np.array([Q_ref])
    if not isinstance(std, collections.Iterable):
        std = np.array([std])

    bin_size = 4.0 * std
    d_distr_samples = np.zeros((M, len(Q_ref)))
    if comm.rank == 0:
        d_distr_samples = bin_size * (np.random.random((M,
                                            len(Q_ref))) - 0.5) + Q_ref
    comm.Bcast([d_distr_samples, MPI.DOUBLE], root=0)

    # Initialize sample set object
    s_set = samp.voronoi_sample_set(len(Q_ref))
    s_set.set_values(d_distr_samples)
    s_set.set_kdtree()

    r'''Now compute probabilities for :math:`\rho_{\mathcal{D},M}` by sampling
    from rho_D First generate samples of rho_D - I sometimes call this
    emulation'''
    num_d_emulate_local = int((num_d_emulate/comm.size) + \
            (comm.rank < num_d_emulate%comm.size))
    d_distr_emulate = np.zeros((num_d_emulate_local, len(Q_ref)))
    for i in xrange(len(Q_ref)):
        d_distr_emulate[:, i] = np.random.normal(Q_ref[i], std[i],
                                                 num_d_emulate_local)

        # Now bin samples of rho_D in the M bins of D to compute rho_{D, M}
    if len(d_distr_samples.shape) == 1:
        d_distr_samples = np.expand_dims(d_distr_samples, axis=1)

    (_, k) = s_set.query(d_distr_emulate)
    count_neighbors = np.zeros((M,), dtype=np.int)
    # volumes = np.zeros((M,))
    for i in xrange(M):
        Itemp = np.equal(k, i)
        count_neighbors[i] = np.sum(Itemp)

    r'''Now define probability of the d_distr_samples This together with
    d_distr_samples defines :math:`\rho_{\mathcal{D},M}`'''
    ccount_neighbors = np.copy(count_neighbors)
    comm.Allreduce([count_neighbors, MPI.INT], [ccount_neighbors, MPI.INT],
                   op=MPI.SUM)
    count_neighbors = ccount_neighbors
    rho_D_M = count_neighbors.astype(np.float64) / float(num_d_emulate)
    s_set.set_probabilities(rho_D_M)
    # NOTE: The computation of q_distr_prob, q_distr_emulate, q_distr_samples
    # above, while informed by the sampling of the map Q, do not require
    # solving the model EVER! This can be done "offline" so to speak.
    if isinstance(data_set, samp.discretization):
        data_set._output_probability_set = s_set
    return s_set
コード例 #6
0
ファイル: simpleFunP.py プロジェクト: outsidercherry/BET
def normal_partition_normal_distribution(data_set, Q_ref=None, std=1, M=1,
                                         num_d_emulate=1E6):
    r"""
    Creates a simple function approximation of :math:`\rho_{\mathcal{D},M}`
    where :math:`\rho_{\mathcal{D},M}` is a multivariate normal probability
    density centered at ``Q_ref`` with standard deviation ``std`` using
    ``M`` bins sampled from the given normal distribution.

    :param data_set: Sample set that the probability measure is defined for.
    :type data_set: :class:`~bet.sample.discretization` or
        :class:`~bet.sample.sample_set` or :class:`~numpy.ndarray`
    :param int M: Defines number M samples in D used to define
        :math:`\rho_{\mathcal{D},M}` The choice of M is something of an "art" -
        play around with it and you can get reasonable results with a
        relatively small number here like 50.
    :param int num_d_emulate: Number of samples used to emulate using an MC
        assumption
    :param Q_ref: :math:`Q(\lambda_{reference})`
    :type Q_ref: :class:`~numpy.ndarray` of size (mdim,)
    :param std: The standard deviation of each QoI
    :type std: :class:`~numpy.ndarray` of size (mdim,)

    :rtype: :class:`~bet.sample.voronoi_sample_set`
    :returns: sample_set object defining simple function approximation

    """
    if Q_ref is None:
        Q_ref = infer_Q(data_set)
    import scipy.stats as stats
    r'''Create M samples defining M bins in D used to define
    :math:`\rho_{\mathcal{D},M}` rho_D is assumed to be a multi-variate normal
    distribution with mean Q_ref and standard deviation std.'''
    Q_ref = check_type(Q_ref, data_set)
    std = check_type(std, data_set)

    covariance = std ** 2

    d_distr_samples = np.zeros((M, len(Q_ref)))
    logging.info("d_distr_samples.shape " + str(d_distr_samples.shape))
    logging.info("Q_ref.shape " + str(Q_ref.shape))
    logging.info("std.shape " + str(std.shape))

    if comm.rank == 0:
        for i in range(len(Q_ref)):
            d_distr_samples[:, i] = np.random.normal(Q_ref[i], std[i], M)
    comm.Bcast([d_distr_samples, MPI.DOUBLE], root=0)

    # Initialize sample set object
    s_set = samp.voronoi_sample_set(len(Q_ref))
    s_set.set_values(d_distr_samples)
    s_set.set_kdtree()

    r'''Now compute probabilities for :math:`\rho_{\mathcal{D},M}` by sampling
    from rho_D First generate samples of rho_D - I sometimes call this
    emulation'''
    num_d_emulate_local = int((num_d_emulate / comm.size) +
                              (comm.rank < num_d_emulate % comm.size))
    d_distr_emulate = np.zeros((num_d_emulate_local, len(Q_ref)))
    for i in range(len(Q_ref)):
        d_distr_emulate[:, i] = np.random.normal(Q_ref[i], std[i],
                                                 num_d_emulate_local)

        # Now bin samples of rho_D in the M bins of D to compute rho_{D, M}
    if len(d_distr_samples.shape) == 1:
        d_distr_samples = np.expand_dims(d_distr_samples, axis=1)

    (_, k) = s_set.query(d_distr_emulate)
    count_neighbors = np.zeros((M,), dtype=np.int)
    volumes = np.zeros((M,))
    for i in range(M):
        Itemp = np.equal(k, i)
        count_neighbors[i] = np.sum(Itemp)
        volumes[i] = np.sum(1.0 / stats.multivariate_normal.pdf
                            (d_distr_emulate[Itemp, :], Q_ref, covariance))
    # Now define probability of the d_distr_samples
    # This together with d_distr_samples defines :math:`\rho_{\mathcal{D},M}`
    ccount_neighbors = np.copy(count_neighbors)
    comm.Allreduce([count_neighbors, MPI.INT], [ccount_neighbors, MPI.INT],
                   op=MPI.SUM)
    count_neighbors = ccount_neighbors
    cvolumes = np.copy(volumes)
    comm.Allreduce([volumes, MPI.DOUBLE], [cvolumes, MPI.DOUBLE], op=MPI.SUM)
    volumes = cvolumes
    rho_D_M = count_neighbors.astype(np.float64) * volumes
    rho_D_M = rho_D_M / np.sum(rho_D_M)
    s_set.set_probabilities(rho_D_M)
    s_set.set_volumes(volumes)

    # NOTE: The computation of q_distr_prob, q_distr_emulate, q_distr_samples
    # above, while informed by the sampling of the map Q, do not require
    # solving the model EVER! This can be done "offline" so to speak.
    if isinstance(data_set, samp.discretization):
        data_set._output_probability_set = s_set
        data_set.set_io_ptr(globalize=False)
    return s_set
コード例 #7
0
ファイル: plotP.py プロジェクト: yangleicq/BET
def calculate_2D_marginal_probs(sample_set, nbins=20):
    """
    This calculates every pair of marginals (or joint in 2d case) of
    input probability measure defined on a rectangular grid.
    If the sample_set object is a discretization object, we assume
    that the probabilities to be plotted are from the input space on the
    emulated samples
    (``discretization._emulated_input_sample_set._probabilties_local``).

    This assumes that the user has already run
    :meth:`~bet.calculateP.calculateP.prob_emulated`.


    :param sample_set: Object containing samples and probabilities
    :type sample_set: :class:`~bet.sample.sample_set_base` 
        or :class:`~bet.sample.discretization`
    :param nbins: Number of bins in each direction.
    :type nbins: :int or :class:`~numpy.ndarray` of shape (ndim,)
    :rtype: tuple
    :returns: (bins, marginals)

    """
    if isinstance(sample_set, sample.discretization):
        sample_obj = sample_set._emulated_input_sample_set
        if sample_obj is None:
            raise missing_attribute("Missing emulated_input_sample_set")
    elif isinstance(sample_set, sample.sample_set_base):
        sample_obj = sample_set
    else:
        raise bad_object("Improper sample object")

    # Check for local probabilities
    if sample_obj._probabilities_local is None:
        if sample_obj._probabilities is None:
            raise missing_attribute("Missing probabilities")
        else:
            sample_obj.global_to_local()

    if sample_obj.get_dim() < 2:
        raise dim_not_matching("Incompatible dimensions of sample set"
                               " for plotting")

    # Make list of bins if only an integer is given
    if isinstance(nbins, int):
        nbins = nbins * np.ones(sample_obj.get_dim(), dtype=np.int)

    # Create bins
    bins = []
    for i in range(sample_obj.get_dim()):
        bins.append(
            np.linspace(sample_obj.get_domain()[i][0],
                        sample_obj.get_domain()[i][1], nbins[i] + 1))

    # Calculate marginals
    marginals = {}
    for i in range(sample_obj.get_dim()):
        for j in range(i + 1, sample_obj.get_dim()):
            (marg,
             _) = np.histogramdd(sample_obj.get_values_local()[:, [i, j]],
                                 bins=[bins[i], bins[j]],
                                 weights=sample_obj.get_probabilities_local())
            marg = np.ascontiguousarray(marg)
            marg_temp = np.copy(marg)
            comm.Allreduce([marg, MPI.DOUBLE], [marg_temp, MPI.DOUBLE],
                           op=MPI.SUM)
            marginals[(i, j)] = marg_temp

    return (bins, marginals)
コード例 #8
0
def unif_normal(Q_ref, M, std, num_d_emulate=1E6):
    r"""
    Creates a simple function approximation of :math:`\rho_{\mathcal{D},M}`
    where :math:`\rho_{\mathcal{D},M}` is a multivariate normal probability
    density centered at Q_ref with standard deviation std using M bins sampled
    from a uniform distribution with a size 4 standard deviations in each
    direction.

    :param int M: Defines number M samples in D used to define
        :math:`\rho_{\mathcal{D},M}` The choice of M is something of an "art" -
        play around with it and you can get reasonable results with a
        relatively small number here like 50.
    :param int num_d_emulate: Number of samples used to emulate using an MC
        assumption 
    :param Q_ref: :math:`Q(\lambda_{reference})`
    :type Q_ref: :class:`~numpy.ndarray` of size (mdim,)
    :param std: The standard deviation of each QoI
    :type std: :class:`~numpy.ndarray` of size (mdim,)
    :rtype: tuple
    :returns: (rho_D_M, d_distr_samples, d_Tree) where ``rho_D_M`` is (M,) and
    ``d_distr_samples`` are (M, mdim) :class:`~numpy.ndarray` and `d_Tree` is
    the :class:`~scipy.spatial.KDTree` for d_distr_samples

    """
    r'''Create M smaples defining M bins in D used to define
    :math:`\rho_{\mathcal{D},M}` rho_D is assumed to be a multi-variate normal
    distribution with mean Q_ref and standard deviation std.'''

    bin_size = 4.0*std
    d_distr_samples = np.zeros((M, len(Q_ref)))
    if comm.rank == 0:
        d_distr_samples = bin_size*(np.random.random((M, 
            len(Q_ref)))-0.5)+Q_ref
    comm.Bcast([d_distr_samples, MPI.DOUBLE], root=0)

 
    r'''Now compute probabilities for :math:`\rho_{\mathcal{D},M}` by sampling
    from rho_D First generate samples of rho_D - I sometimes call this
    emulation''' 
    num_d_emulate = int(num_d_emulate/comm.size)+1
    d_distr_emulate = np.zeros((num_d_emulate, len(Q_ref)))
    for i in range(len(Q_ref)):
        d_distr_emulate[:, i] = np.random.normal(Q_ref[i], std[i], 
                num_d_emulate) 

    # Now bin samples of rho_D in the M bins of D to compute rho_{D, M}
    if len(d_distr_samples.shape) == 1:
        d_distr_samples = np.expand_dims(d_distr_samples, axis=1)

    d_Tree = spatial.KDTree(d_distr_samples)
    (_, k) = d_Tree.query(d_distr_emulate)
    count_neighbors = np.zeros((M,), dtype=np.int)
    #volumes = np.zeros((M,))
    for i in range(M):
        Itemp = np.equal(k, i)
        count_neighbors[i] = np.sum(Itemp)
        
    r'''Now define probability of the d_distr_samples This together with
    d_distr_samples defines :math:`\rho_{\mathcal{D},M}`'''
    ccount_neighbors = np.copy(count_neighbors)
    comm.Allreduce([count_neighbors, MPI.INT], [ccount_neighbors, MPI.INT],
            op=MPI.SUM) 
    count_neighbors = ccount_neighbors
    rho_D_M = count_neighbors.astype(np.float64)/float(comm.size*num_d_emulate)
    
    # NOTE: The computation of q_distr_prob, q_distr_emulate, q_distr_samples
    # above, while informed by the sampling of the map Q, do not require
    # solving the model EVER! This can be done "offline" so to speak.
    return (rho_D_M, d_distr_samples, d_Tree)
コード例 #9
0
def unif_unif(data, Q_ref, M=50, bin_ratio=0.2, num_d_emulate=1E6):
    r"""
    Creates a simple function approximation of :math:`\rho_{\mathcal{D}}`
    where :math:`\rho_{\mathcal{D}}` is a uniform probability density on
    a generalized rectangle centered at Q_ref.
    The support of this density is defined by bin_ratio, which determines
    the size of the generalized rectangle by scaling the circumscribing 
    generalized rectangle of :math:`\mathcal{D}`.
    The simple function approximation is then defined by determining M 
    Voronoi cells (i.e., "bins") partitioning :math:`\mathcal{D}`. These
    bins are only implicitly defined by M samples in :math:`\mathcal{D}`.
    Finally, the probabilities of each of these bins is computed by 
    sampling from :math:`\rho{\mathcal{D}}` and using nearest neighbor 
    searches to bin these samples in the M implicitly defined bins. 
    The result is the simple function approximation denoted by
    :math:`\rho_{\mathcal{D},M}`.
    
    Note that all computations in the measure-theoretic framework that
    follow from this are for the fixed simple function approximation
    :math:`\rho_{\mathcal{D},M}`.

    :param int M: Defines number M samples in D used to define
        :math:`\rho_{\mathcal{D},M}` The choice of M is something of an "art" -
        play around with it and you can get reasonable results with a
        relatively small number here like 50.
    :param bin_ratio: The ratio used to determine the width of the
        uniform distributiion as ``bin_size = (data_max-data_min)*bin_ratio``
    :type bin_ratio: double or list()
    :param int num_d_emulate: Number of samples used to emulate using an MC
        assumption 
    :param data: Array containing QoI data where the QoI is mdim
        diminsional
    :type data: :class:`~numpy.ndarray` of size (num_samples, mdim)
    :param Q_ref: :math:`Q(`\lambda_{reference})`
    :type Q_ref: :class:`~numpy.ndarray` of size (mdim,)
    :rtype: tuple
    :returns: (rho_D_M, d_distr_samples, d_Tree) where ``rho_D_M`` is (M,) and
    ``d_distr_samples`` are (M, mdim) :class:`~numpy.ndarray` and `d_Tree` is
    the :class:`~scipy.spatial.KDTree` for d_distr_samples
    """
    data = util.fix_dimensions_data(data)
    bin_size = (np.max(data, 0) - np.min(data, 0))*bin_ratio


    r'''
    Create M samples defining M Voronoi cells (i.e., "bins") in D used to 
    define the simple function approximation :math:`\rho_{\mathcal{D},M}`.
    
    This does not have to be random, but here we assume this to be the case.
    We can choose these samples deterministically but that fails to scale with
    dimension efficiently.
    
    Note that these M samples are chosen for the sole purpose of determining
    the bins used to create the approximation to :math:`rho_{\mathcal{D}}`.
    
    We call these M samples "d_distr_samples" because they are samples on the
    data space and the distr implies these samples are chosen to create the
    approximation to the probability measure (distribution) on D.
    
    Note that we create these samples in a set containing the hyperrectangle in
    order to get output cells with zero probability. If all of the
    d_dstr_samples were taken from within the support of
    :math:`\rho_{\mathcal{D}}` then each of the M bins would have positive
    probability. This would in turn imply that the support of
    :math:`\rho_{\Lambda}` is all of :math:`\Lambda`.
    '''
    if comm.rank == 0:
        d_distr_samples = 1.5*bin_size*(np.random.random((M,
            data.shape[1]))-0.5)+Q_ref 
    else:
        d_distr_samples = np.empty((M, data.shape[1]))
    comm.Bcast([d_distr_samples, MPI.DOUBLE], root=0)

    r'''
    Compute probabilities in the M bins used to define
    :math:`\rho_{\mathcal{D},M}` by Monte Carlo approximations
    that in this context amount to binning with nearest neighbor
    approximations the num_d_emulate samples taken from
    :math:`\rho_{\mathcal{D}}`.
    '''
    # Generate the samples from :math:`\rho_{\mathcal{D}}`
    num_d_emulate = int(num_d_emulate/comm.size)+1
    d_distr_emulate = bin_size*(np.random.random((num_d_emulate,
        data.shape[1]))-0.5) + Q_ref

    # Bin these samples using nearest neighbor searches
    d_Tree = spatial.KDTree(d_distr_samples)
    (_, k) = d_Tree.query(d_distr_emulate)
    count_neighbors = np.zeros((M,), dtype=np.int)
    for i in range(M):
        count_neighbors[i] = np.sum(np.equal(k, i))


    # Use the binning to define :math:`\rho_{\mathcal{D},M}`
    ccount_neighbors = np.copy(count_neighbors)
    comm.Allreduce([count_neighbors, MPI.INT], [ccount_neighbors, MPI.INT],
            op=MPI.SUM)
    count_neighbors = ccount_neighbors
    rho_D_M = count_neighbors.astype(np.float64) / float(num_d_emulate*comm.size)

    '''
    NOTE: The computation of q_distr_prob, q_distr_emulate, q_distr_samples
    above, while possibly informed by the sampling of the map Q, do not require
    solving the model EVER! This can be done "offline" so to speak. The results
    can then be stored and accessed later by the algorithm using a completely
    different set of parameter samples and model solves.
    '''
    return (rho_D_M, d_distr_samples, d_Tree)