def compute_logp_independent_block_mc(X, alpha=None, iterations=1e5): """Compute the montecarlo log likelihood of a matrix under the assumption of independence. """ if alpha is None: alpha = np.ones(X.shape[1]) Theta = dirichlet(alpha, size=int(iterations)).T logp_ibs = gammaln(X.sum(1)+1).sum() - gammaln(X+1).sum(1).sum() + (np.log(Theta[:,None,:])*X[:,:,None]).sum(1).sum(0) # log(\prod(one Multinomial pdf for each row)) return logmean(logp_ibs)
def log_multivariate_polya_mc(X, alpha, iterations=1e5): """Montecarlo estimation of the log-likelihood of the Dirichlet compound multinomial (DCM) distribution, a.k.a. the multivariate Polya distribution. """ Theta = dirichlet(alpha, size=int(iterations)) logp_Hs = gammaln(X.sum() + 1) - gammaln(X + 1).sum() logp_Hs += (X * np.log(Theta)).sum(1) return logmean(logp_Hs)
def estimate_radius(treeB, A, k, subset_size=1000): """Estimate the radius r for a Tree.query_radius(A, r) that will return approximately k neighbors per point. """ D = A.shape[1] if A.shape[0] > subset_size: A = A[np.random.permutation(A.shape[0])[:subset_size], :] # subsampling d, i = tree_parallel_query(treeB, A, k) rs = d[:, -1] # Estimate the log densities of neighbors in all D-dimensional # spheres of the subset of queries performed above. Given the # volume of the D-dimensional sphere: V = R^D * pi^(D/2) / # gamma(D/2+1) and that the density is V/k, then the mean of the # densities (in logspace to avoid numerical issues) is: const = gammaln(D / 2.0 + 1.0) - D / 2.0 * np.log(np.pi) log_density_mean = logmean(np.log(d.shape[1]) + const - D * np.log(rs)) # So, the expexted radius R for k neighbors is: log_r_mean = 1.0 / D * (np.log(k) + const - log_density_mean) r_mean = np.exp(log_r_mean) print("Estimated radius to get %s neighbors on average: %s" % (k, r_mean)) return r_mean