def compute_logp_independent_block_mc(X, alpha=None, iterations=1e5):
    """Compute the montecarlo log likelihood of a matrix under the
    assumption of independence.
    """
    if alpha is None: alpha = np.ones(X.shape[1])
    Theta = dirichlet(alpha, size=int(iterations)).T
    logp_ibs = gammaln(X.sum(1)+1).sum() - gammaln(X+1).sum(1).sum() + (np.log(Theta[:,None,:])*X[:,:,None]).sum(1).sum(0) # log(\prod(one Multinomial pdf for each row))
    return logmean(logp_ibs)
def log_multivariate_polya_mc(X, alpha, iterations=1e5):
    """Montecarlo estimation of the log-likelihood of the Dirichlet
    compound multinomial (DCM) distribution, a.k.a. the multivariate
    Polya distribution.
    """
    Theta = dirichlet(alpha, size=int(iterations))
    logp_Hs = gammaln(X.sum() + 1) - gammaln(X + 1).sum()
    logp_Hs += (X * np.log(Theta)).sum(1)

    return logmean(logp_Hs)
Ejemplo n.º 3
0
def estimate_radius(treeB, A, k, subset_size=1000):
    """Estimate the radius r for a Tree.query_radius(A, r) that will
    return approximately k neighbors per point.
    """
    D = A.shape[1]
    if A.shape[0] > subset_size:
        A = A[np.random.permutation(A.shape[0])[:subset_size], :]  # subsampling

    d, i = tree_parallel_query(treeB, A, k)
    rs = d[:, -1]
    # Estimate the log densities of neighbors in all D-dimensional
    # spheres of the subset of queries performed above. Given the
    # volume of the D-dimensional sphere: V = R^D * pi^(D/2) /
    # gamma(D/2+1) and that the density is V/k, then the mean of the
    # densities (in logspace to avoid numerical issues) is:
    const =  gammaln(D / 2.0 + 1.0) - D / 2.0 * np.log(np.pi)
    log_density_mean = logmean(np.log(d.shape[1]) + const - D * np.log(rs))
    # So, the expexted radius R for k neighbors is:
    log_r_mean = 1.0 / D * (np.log(k) + const - log_density_mean)
    r_mean = np.exp(log_r_mean)
    print("Estimated radius to get %s neighbors on average: %s" % (k, r_mean))
    return r_mean