Example #1
0
def log_likelihood(X, weights, means, covars, scale):
    """modified sklearn GMM function predicting distribution membership

    Returns the mixture LL for points X. Used by :func:`~assign_samples` and
    :func:`~PopPUNK.plot.plot_contours`

    Args:
        X (numpy.array)
            n x 2 array of core and accessory distances for n samples
        weights (numpy.array)
            Component weights from :func:`~fit2dMultiGaussian`
        means (numpy.array)
            Component means from :func:`~fit2dMultiGaussian`
        covars (numpy.array)
            Component covariances from :func:`~fit2dMultiGaussian`
        scale (numpy.array)
            Scaling of core and accessory distances from :func:`~fit2dMultiGaussian`
    Returns:
        logprob (numpy.array)
            The log of the probabilities under the mixture model
        lpr (numpy.array)
            The components of the log probability from each mixture component
    """

    lpr = (log_multivariate_normal_density(X / scale, means, covars) +
           np.log(weights))
    logprob = sp_logsumexp(lpr, axis=1)

    return (logprob, lpr)
Example #2
0
def logsumexp(a, axis=None):
    """
    Compute the log of the sum of exponentials log(e^{a_1}+...e^{a_n}) of a

    Avoids numerical overflow.

    Parameters
    ----------
    a : array_like
        The vector to exponentiate and sum
    axis : int, optional
        The axis along which to apply the operation.  Defaults is None.

    Returns
    -------
    sum(log(exp(a)))

    Notes
    -----
    This function was taken from the mailing list
    http://mail.scipy.org/pipermail/scipy-user/2009-October/022931.html

    This should be superceded by the ufunc when it is finished.
    """
    if axis is None:
        # Use the scipy.maxentropy version.
        return sp_logsumexp(a)
    a = np.asarray(a)
    shp = list(a.shape)
    shp[axis] = 1
    a_max = a.max(axis=axis)
    s = np.log(np.exp(a - a_max.reshape(shp)).sum(axis=axis))
    lse = a_max + s
    return lse
Example #3
0
def PDI(trace, model):
    log_px = _log_post_trace(trace, model)  # shape (nsamples, N_datapoints)

    # log posterior predictive density of data point n
    #  = E_{q(\theta)} p(x_n|\theta)
    lppd_n = sp_logsumexp(log_px, axis=0, b=1.0 / log_px.shape[0])

    mu_n = np.exp(lppd_n)

    var_log_n = np.var(log_px, axis=0)

    mu_log_n = np.mean(log_px, axis=0)

    var_n = np.var(np.exp(log_px), axis=0)

    pdi = np.divide(var_n, mu_n)
    pdi_log = np.divide(var_log_n, mu_log_n)

    wapdi = np.divide(var_log_n, np.log(mu_n))

    return pdi, pdi_log, wapdi