def test_roots_hermite():
    rootf = sc.roots_hermite
    evalf = sc.eval_hermite
    weightf = orth.hermite(5).weight_func

    verify_gauss_quad(rootf, evalf, weightf, -np.inf, np.inf, 5)
    verify_gauss_quad(rootf, evalf, weightf, -np.inf, np.inf, 25, atol=1e-13)
    verify_gauss_quad(rootf, evalf, weightf, -np.inf, np.inf, 100, atol=1e-12)

    # Golub-Welsch branch
    x, w = sc.roots_hermite(5, False)
    y, v, m = sc.roots_hermite(5, True)
    assert_allclose(x, y, 1e-14, 1e-14)
    assert_allclose(w, v, 1e-14, 1e-14)

    muI, muI_err = integrate.quad(weightf, -np.inf, np.inf)
    assert_allclose(m, muI, rtol=muI_err)

    # Asymptotic branch (switch over at n >= 150)
    x, w = sc.roots_hermite(200, False)
    y, v, m = sc.roots_hermite(200, True)
    assert_allclose(x, y, 1e-14, 1e-14)
    assert_allclose(w, v, 1e-14, 1e-14)
    assert_allclose(sum(v), m, 1e-14, 1e-14)

    assert_raises(ValueError, sc.roots_hermite, 0)
    assert_raises(ValueError, sc.roots_hermite, 3.3)
Exemple #2
0
def test_roots_hermite():
    rootf = sc.roots_hermite
    evalf = orth.eval_hermite
    weightf = orth.hermite(5).weight_func

    verify_gauss_quad(rootf, evalf, weightf, -np.inf, np.inf, 5)
    verify_gauss_quad(rootf, evalf, weightf, -np.inf, np.inf, 25, atol=1e-13)
    verify_gauss_quad(rootf, evalf, weightf, -np.inf, np.inf, 100, atol=1e-12)

    # Golub-Welsch branch
    x, w = sc.roots_hermite(5, False)
    y, v, m = sc.roots_hermite(5, True)
    assert_allclose(x, y, 1e-14, 1e-14)
    assert_allclose(w, v, 1e-14, 1e-14)

    muI, muI_err = integrate.quad(weightf, -np.inf, np.inf)
    assert_allclose(m, muI, rtol=muI_err)

    # Asymptotic branch (switch over at n >= 150)
    x, w = sc.roots_hermite(200, False)
    y, v, m = sc.roots_hermite(200, True)
    assert_allclose(x, y, 1e-14, 1e-14)
    assert_allclose(w, v, 1e-14, 1e-14)
    assert_allclose(sum(v), m, 1e-14, 1e-14)

    assert_raises(ValueError, sc.roots_hermite, 0)
    assert_raises(ValueError, sc.roots_hermite, 3.3)
Exemple #3
0
def gauss_hermite_quadrature(n, mu=None, sigma=None):
    """
    Compute int f(x) exp(-x^2) dx = sum_{i=0}^n w[i] * f(x[i]) 
    using Gaussian-Hermite quadrature

    Parameters
    ----------
    n : TYPE
        DESCRIPTION.
    mu : TYPE, optional
        DESCRIPTION. The default is None.
    sigma : TYPE, optional
        DESCRIPTION. The default is None.

    Returns
    -------
    TYPE
        DESCRIPTION.
    w : TYPE
        DESCRIPTION.

    """
    x, w = roots_hermite(n)
    if mu is not None:
        y = sqrt(2.) * sigma * x + mu

        return y, w
    else:
        return x, w
Exemple #4
0
def gauss_hermite(n):
    '''
    Gauss-Hermite quadrature:
    
    A rule of order 2*n-1 on the line with respect to
    the weight function w(x) = exp(-x**2).
    '''
    return special.roots_hermite(n)
def get_hfactor_expectation_coefs_points(factor, K, T, dtype='float64'):
    """
    Get the coefficients and evaluation points needed for computing expectation of a scalar-valued function over the
    hybrid factor (for all K mixture components simultaneously)
    :param factor:
    :param T: number of quad points (only needed if factor contains cont node); for now assume the same for all cnodes
    :return: (coefs, axes): coefs, axes are lists of matricies, of shapes [[K, V1], [K, V2], ..., [K, Vn]], where n
    is the number of nodes in the factor, Vi = T if node i is cont, or is the number of states if node i is discrete;
    to compute expectation w.r.t. the kth component (fully factorized) distribution, define the tensor of coefficients
    C_k := \bigotimes_{i=1}^n coefs[i][k, :], the tensor of evaluation points E_k = \bigotimes_{i=1}^n axes[i][k, :],
    then the kth expectation is \langle vec(C_k), vec(f(E_k)) \rangle; the total expectation w.r.t. the mixture is
    obtained by taking a weighted mixture (by w_k) of K component expectations.
    The list of the kth rows of axes mats gives the axes needed to construct evaluation grid for computing the kth
    component-wise expectation
    """
    coefs = []
    axes = []
    assert factor.domain_type in ('c', 'h'), \
        'Must input continuous/hybrid factor; use dfactor_bfe_obj directly for discrete factor for better performance'
    # compute GHQ once (same for all cnodes) if factor is cont/hybrid
    ghq_points, ghq_weights = roots_hermite(T)  # assuming Gaussian for now
    ghq_coef = (np.pi)**(-0.5)  # from change-of-var
    ghq_weights = ghq_coef * ghq_weights  # let's fold ghq_coef into the quadrature weights, so no need to worry about it later
    # ghq_weights_KT = np.tile(np.reshape(ghq_weights, [1, -1]), [K, 1])  # K x T (repeat for K identical rows)
    ghq_weights = tf.constant(ghq_weights, dtype=dtype)
    ghq_weights_KT = tf.tile(tf.reshape(ghq_weights, [1, -1]),
                             [K, 1])  # K x T (repeat for K identical rows)

    for rv in factor.nb:
        if rv.domain_type[0] == 'd':  # discrete
            c = rv.belief_params_[
                'pi']  # K x dstates[i] matrix (tf); will be put under stop_gradient later
            a = np.tile(np.reshape(rv.values, [1, -1]),
                        [K, 1])  # K x dstates[i] (last dimension repeated)
            a = tf.constant(
                a, dtype=dtype
            )  # otherwise tf complains about multiplying int tensor with float tensor
        elif rv.domain_type[0] == 'c':  # cont, assuming Gaussian for now
            c = ghq_weights_KT
            mean_K1 = rv.belief_params_['mu_K1']
            var_K1 = rv.belief_params_['var_K1']
            a = (2 * var_K1)**0.5 * ghq_points + mean_K1  # K x T
            a = tf.stop_gradient(
                a)  # don't want to differentiate w.r.t. evaluation points
        else:
            raise NotImplementedError
        coefs.append(c)
        axes.append(a)

    return coefs, axes
def crvs_bfe_obj(rvs, T, w, Mu, Var, rvs_counts=None):
    """
    Get the contribution to the BFE from multiple cont (Gaussian) rvs
    :param rvs: rvs: list of cont rvs; must "line up with" params Mu and Var; i.e., Mu[i] and Var[i] give belief params
    for rvs[i]
    :param T:
    :param w:
    :param Mu:
    :param Var:
    :param rvs_counts: an iterable of non-negative ints, such that the bfe contribution from rvs[i] will be multiplied
    by rvs_counts[i]; by default the contribution from each rv is only counted once
    :return:
    """
    [N, K] = Mu.shape
    w_1K1 = w[None, :, None]

    ghq_points, ghq_weights = roots_hermite(T)  # assuming Gaussian for now
    ghq_coef = (np.pi)**(-0.5)  # from change-of-var
    ghq_weights *= ghq_coef
    QY = ghq_points * (2 * tf.reshape(Var, [N, K, 1]))**0.5 + tf.reshape(
        Mu, [N, K, 1])  # N x K x T; all eval points
    QY = tf.stop_gradient(QY)  # don't want to differentiate w.r.t. quad points

    log_belief = tf.log(eval_crvs_belief(QY, w, Mu, Var))  # N x K x T
    prod = tf.stop_gradient(
        w_1K1 * ghq_weights *
        log_belief)  # N x K x T; weighted component-wise Hadamard products

    num_nbrs = np.array([len(rv.nb) for rv in rvs])
    if rvs_counts is None:
        rvs_counts = np.ones(len(rvs), dtype='int')
    else:
        rvs_counts = np.array(rvs_counts).astype('int')
    expect_coefs = rvs_counts * (1 - num_nbrs)

    bfe = tf.reduce_sum(expect_coefs * tf.reduce_sum(prod, axis=[1, 2]))
    aux_obj = tf.reduce_sum(expect_coefs *
                            tf.reduce_sum(prod * log_belief, axis=[1, 2]))

    return bfe, aux_obj
Exemple #7
0
def create_mapping_of_nodes_and_weights(number_of_nodes):
    from scipy.special import roots_hermite
    nodes, weights = roots_hermite(number_of_nodes+1)
    return {'nodes': nodes, 'weights': weights}
Exemple #8
0
    dim) * wi  # maximum search length. Default: diagonal of search domain
tau = 0.9  # contraction rate. Default: 0.9
num_ls = int(
    (GH_pts - 1) * dim * 0.05
)  # number of points used for LS. Default: 5% of function evaluations used for computing DGS gradient
# if GH_pts is even, num_ls = int(GH_pts*dim*0.05)

pw = 5 * wi  # initial radius. Default: 5 * width

eps = 1e-3  # tolerance of relative update for resetting radius. Default: 1e-3
res_step = 10  # minimum number of step between radius reset. Default: 10

#----------------------------------------------------------
# GH values and weights
#----------------------------------------------------------
gh = roots_hermite(GH_pts)
gh_value = np.expand_dims(gh[0], axis=1)
gh_weight = gh[1]

# GH point matrix
gh_value_mat = np.zeros((GH_pts * dim, dim))
gh_value_vec = np.zeros(GH_pts * dim)

for i in range(dim):
    gh_value_mat[GH_pts * i:GH_pts * (i + 1), i] = gh[0]
    gh_value_vec[GH_pts * i:GH_pts * (i + 1)] = gh[0]

#########################################################
# MAIN LOOP
#########################################################
def gauss_hermite(fun, n, args=None):
    """Gauss hermite quadrature"""
    xi, wi = roots_hermite(n)
    return np.sum(fun(xi, *args) * wi)
def get_quad_bfe(g, w, Mu, Sigs, T, node_lpot, edge_lpot):
    """
    Get the symbolic tensorflow objective for optimizing BFE with quadrature approximation for the integrals.
    :param g: graph; for now assuming all its continuous nodes are modeled by diag gaussian mixtures
    # :param nodes: length N list/set of node ids that are modeled by diag gaussian mixtures
    :param Mu: N x K tensor of diagonal gaussian mixture nodes means
    :param Sigs: N x K tensor of diagonal gaussian mixture nodes variances
    :param T: num quad points
    :return: bfe, aux_obj; aux_obj is the actual (minimization) objective that tensorflow does auto-diff w.r.t.
    (tf can't directly differentiate thru expectations in the bfe)
    """
    from scipy.special import roots_hermite
    [N, K] = Mu.shape
    assert g.Nc == N  # TODO: no longer assume all continuous nodes are gm; allow specifying a subset of nodes
    num_cedges = len(g.Ec)
    dtype = Mu.dtype

    bfe = 0
    aux_obj = 0

    w_col = tf.reshape(w, [K, 1])

    qx_np, qw_np = roots_hermite(T)
    qx = tf.constant(qx_np, dtype=dtype)
    qw = tf.constant(qw_np, dtype=dtype)
    qw_outer = tf.constant(np.outer(qw_np, qw_np))  # TxT

    integral_coef = (np.pi)**(-0.5)

    QY = qx * (2 * tf.reshape(Sigs, [N, K, 1]))**0.5 + tf.reshape(
        Mu, [N, K, 1])  # N x K x T
    QY = tf.stop_gradient(
        QY)  # don't want to differentiate w.r.t. quadrature points

    # all nodes
    num_nbrs = np.array([len(g.adj[n]) for n in g.Vc])
    num_nbrs = num_nbrs.reshape([N, 1, 1])
    node_log_belief = tf.log(node_belief(QY, w, Mu, Sigs))  # N x K x T
    F = node_lpot('c', QY) - (1 - num_nbrs) * node_log_belief  # N x K x T

    grals = integral_coef * tf.reduce_sum(qw * F, 2)  # Nc x K
    bfe += tf.reduce_sum(grals @ w_col)

    grals = integral_coef * tf.reduce_sum(
        qw * tf.stop_gradient(F) * node_log_belief, 2)  # treating F as const
    aux_obj += tf.reduce_sum(grals @ w_col)

    # all edges
    cedge_i = np.array([g.Vc_idx[n]
                        for n in g.Ec[:, 0]])  # 1 x num_cedges; from
    cedge_j = np.array([g.Vc_idx[n] for n in g.Ec[:, 1]])  # 1 x num_cedges; to
    QYi = tf.gather(QY, cedge_i)  # num_cedges x K x T
    QYYi = tf.zeros([num_cedges, K, T, T], dtype=Mu.dtype) \
           + tf.reshape(QYi, [num_cedges, K, T, 1])  # hack, since there's no tf.repeat
    QYj = tf.gather(QY, cedge_j)  # num_cedges x K x T
    QYYj = tf.zeros([num_cedges, K, T, T], dtype=dtype) \
           + tf.reshape(QYj, [num_cedges, K, 1, T])  # hack, since there's no tf.repeat
    cedge_log_belief = tf.log(
        edge_belief(QYYi, QYYj, w, tf.gather(Mu, cedge_i),
                    tf.gather(Mu, cedge_j), tf.gather(Sigs, cedge_i),
                    tf.gather(Sigs, cedge_j)))
    F = edge_lpot('c', 'c', QYYi, QYYj) - cedge_log_belief

    inner_prods = tf.reduce_sum(qw_outer * F, axis=[2, 3])  # num_cedges x K
    bfe += integral_coef**2 * tf.reduce_sum(inner_prods @ w_col)

    inner_prods = tf.reduce_sum(qw_outer * tf.stop_gradient(F) *
                                cedge_log_belief,
                                axis=[2, 3])  # treating F as const
    aux_obj += integral_coef**2 * tf.reduce_sum(inner_prods @ w_col)

    return bfe, aux_obj
Exemple #11
0
def gauss_hermite(fun, n, args=None):
    """Gauss hermite quadrature"""
    xi, wi = roots_hermite(n)
    return np.sum(fun(xi, *args) * wi)
def hfactors_bfe_obj(factors, T, w, dtype='float64', neg_lpot_only=False):
    """
    Get the contribution to the BFE from multiple hybrid (or continuous) factors that have the same types of neighboring
    rvs.
    :param factors: length C list of factor objects that have the same nb_domain_type.
    :param T:
    :param w:
    :param dtype: float type to use
    :param neg_lpot_only: if False (default), compute E_b[-log pot + log b] as in BFE;
    if True, only compute E_b[-log pot] (with no log belief in the expectant), to be used with neg ELBO (for NPVI)
    :return:
    """
    # group factors with the same types of log potentials together for efficient evaluation later
    factors_with_unique_log_potential_fun_types, unique_log_potential_fun_types = \
        utils.get_unique_subsets(factors, key=lambda f: type(f.log_potential_fun))
    factors = sum(factors_with_unique_log_potential_fun_types,
                  [])  # join together into flat list

    K = np.prod(w.shape)
    C = len(factors)
    factor = factors[0]
    n = len(factor.nb)

    ghq_points, ghq_weights = roots_hermite(T)  # assuming Gaussian for now
    ghq_coef = (np.pi)**(-0.5)  # from change-of-var
    ghq_weights = ghq_coef * ghq_weights  # let's fold ghq_coef into the quadrature weights, so no need to worry about it later
    ghq_weights = tf.constant(ghq_weights, dtype=dtype)
    ghq_weights_CKT = tf.tile(tf.reshape(ghq_weights, [1, 1, T]),
                              [C, K, 1])  # C x K x T

    coefs = [None] * n  # will be [[C, K, V1], [C, K, V2], ..., [C, K, Vn]]
    axes = [None] * n  # will be [[C, K, V1], [C, K, V2], ..., [C, K, Vn]]

    comp_probs = []  # for evaluating beliefs along the way
    for i, domain_type in enumerate(factor.nb_domain_types):
        factors_ith_nb = [
            factor.nb[i] for factor in factors
        ]  # the ith neighbor (rv in clique) across all factors
        if domain_type[0] == 'd':
            rv = factor.nb[i]
            c = tf.stack(
                [rv.belief_params_['pi'] for rv in factors_ith_nb], axis=0
            )  # C x K x Vi, where Vi is the number of dstates of factor.nb[i]

            coefs[
                i] = c  # the prob params are exactly the inner-prod coefficients in expectations
            a = np.tile(
                np.reshape(rv.values, [1, 1, -1]),
                [C, K, 1])  # C x K x dstates[i] (last dimension repeated)
            a = tf.constant(
                a, dtype=dtype
            )  # otherwise tf complains about multiplying int tensor with float tensor
            axes[i] = a

            # eval_hfactors_belief
            # comp_prob = tf.stack([rv.belief_params_['pi'] for rv in factors_ith_nb],
            #                      axis=1)  # K x C x Vi, where Vi is the number of dstates of factor.nb[i]
            comp_prob = tf.transpose(c, [1, 0, 2])  # K x C x Vi
            comp_prob = comp_prob[:, :, None, :]  # K x C x 1 x Vi
            comp_prob = tf.tile(
                comp_prob,
                [1, 1, K, 1])  # K x C x M(=K) x Vi; same for all M(=K) axes
        elif domain_type[0] == 'c':
            Mu_CK = tf.stack(
                [rv.belief_params_['mu'] for rv in factors_ith_nb],
                axis=0)  # C x K
            Var_CK = tf.stack(
                [rv.belief_params_['var'] for rv in factors_ith_nb],
                axis=0)  # C x K
            coefs[i] = ghq_weights_CKT
            a = (2 * Var_CK[:, :, None]
                 )**0.5 * ghq_points + Mu_CK[:, :, None]  # C x K x T
            a = tf.stop_gradient(
                a)  # don't want to differentiate w.r.t. evaluation points
            axes[i] = a

            # eval_hfactors_belief
            Mu_KC11 = tf.transpose(Mu_CK)[:, :, None, None]  # K x C x 1 x 1
            Var_inv_KC11 = tf.stack(
                [rv.belief_params_['var_inv_K1'] for rv in factors_ith_nb],
                axis=1)[:, :, None]
            # eval pdf of axes[i] under all K scalar comps of ith nodes in all the cliques; result is K x C x M(=K) x Vi
            comp_prob = (2 * np.pi) ** (-0.5) * tf.sqrt(Var_inv_KC11) * \
                        tf.exp(-0.5 * (axes[i] - Mu_KC11) ** 2 * Var_inv_KC11)
        else:
            raise NotImplementedError
        comp_probs.append(comp_prob)

    # eval_hfactors_belief
    # multiply all dimensions together, then weigh by w
    einsum_eq = utils.outer_prod_einsum_equation(len(factor.nb),
                                                 common_first_ndims=3)
    joint_comp_probs = tf.einsum(einsum_eq,
                                 *comp_probs)  # K x C x M x V1 x V2 x ... Vn
    w_broadcast = tf.reshape(w, [K] + [1] * (len(factor.nb) + 2))
    belief = tf.reduce_sum(w_broadcast * joint_comp_probs,
                           axis=0)  # C x M x V1 x V2 x ... Vn
    # above replaces the call belief = eval_hfactors_belief(factors, axes, w)  # C x K x V1 x V2 x ... Vn

    einsum_eq = utils.outer_prod_einsum_equation(n, common_first_ndims=2)
    coefs = tf.einsum(
        einsum_eq,
        *coefs)  # C x K x V1 x V2 x ... Vn; C x K grids of Hadamard products

    lpot = group_eval_log_potential_funs(
        factors_with_unique_log_potential_fun_types,
        unique_log_potential_fun_types, axes)  # C x K x V1 x V2 x ... Vn
    log_belief = tf.log(belief)
    if neg_lpot_only:
        F = -lpot
    else:
        F = -lpot + log_belief
    w_broadcast = tf.reshape(w, [-1] + [1] * n)  # K x 1 x 1 ... x 1
    prod = tf.stop_gradient(
        w_broadcast * coefs *
        F)  # weighted component-wise Hadamard products for C x K expectations
    factors_bfes = tf.reduce_sum(prod, axis=list(range(
        1, n + 2)))  # reduce the last (n+1) dimensions
    factors_aux_objs = tf.reduce_sum(
        prod * log_belief,
        axis=list(range(1, n + 2)))  # reduce the last (n+1) dimensions

    sharing_counts = np.array([factor.sharing_count for factor in factors],
                              dtype='float')
    bfe = tf.reduce_sum(sharing_counts * factors_bfes)
    aux_obj = tf.reduce_sum(sharing_counts * factors_aux_objs)

    return bfe, aux_obj
Exemple #13
0
def hermiteRoots(self, n):
    return special.roots_hermite(n)