예제 #1
0
def js_divergence(P, Q):
    """Jensen-Shannon divergence between `P` and `Q`.

    Parameters
    ----------

    P, Q (np.ndarray)
        Two discrete distributions represented as 1D arrays. They are
        assumed to have the same support

    Returns
    -------

    float
        The Jensen-Shannon divergence between `P` and `Q`.

    """
    M = 0.5 * (P + Q)
    jsd = 0.5 * (sp_entropy(P, M, base=2) + sp_entropy(Q, M, base=2))

    # If the input distributions are identical, floating-point error in the
    # construction of the mixture matrix can result in negative values that are
    # very close to zero. If one wants to compute the root-JSD metric, these
    # negative values lead to undesirable nans.
    if np.isclose(jsd, 0.0):
        return 0
    else:
        return jsd
예제 #2
0
파일: entropy.py 프로젝트: leotrs/netrd
def joint_entropy(data):
    r"""Joint entropy of all variables in the data.

    Parameters
    ----------
    data (np.ndarray)
        Array of data with variables as columns and observations as rows.

    Returns
    -------
    float
        Joint entropy of the variables of interests.

    Notes
    -----
    1. :math:`H(\{X_i\}) = - \sum p(\{X_i\}) \log_2(p(\{X_i\}))`
    2. The data of variables must be categorical.

    """
    # Entropy is computed through summing contribution of states with
    # non-zero empirical probability in the data
    count = defaultdict(int)
    for state in data:
        key = tuple(state)
        count[key] += 1

    return sp_entropy(list(count.values()), base=2)
예제 #3
0
def error(alpha, n):
    """Return the actual error and the estimated uncertainty (normalized)"""
    k = len(alpha)
    pvals = dirichlet(alpha)
    counts = multinomial(n, pvals)
    h0 = sp_entropy(pvals)
    h, std = ndd.entropy(counts, k=k, return_std=True)
    return (h - h0) / h0, std / h0
예제 #4
0
파일: entropy.py 프로젝트: leotrs/netrd
def js_divergence(P, Q):
    """Jensen-Shannon divergence between `P` and `Q`.

    Parameters
    ----------

    P, Q (np.ndarray)
        Two discrete distributions represented as 1D arrays. They are
        assumed to have the same support

    Returns
    -------

    float
        The Jensen-Shannon divergence between `P` and `Q`.

    """
    M = 0.5 * (P + Q)
    return 0.5 * (sp_entropy(P, M, base=2) + sp_entropy(Q, M, base=2))
예제 #5
0
def test_entropy_execution(setup):
    rs = np.random.RandomState(0)
    a = rs.rand(10)

    t1 = tensor(a, chunk_size=4)
    r = entropy(t1)

    result = r.execute().fetch()
    expected = sp_entropy(a)
    np.testing.assert_array_almost_equal(result, expected)

    b = rs.rand(10)
    base = 3.1

    t2 = tensor(b, chunk_size=4)
    r = entropy(t1, t2, base)

    result = r.execute().fetch()
    expected = sp_entropy(a, b, base)
    np.testing.assert_array_almost_equal(result, expected)

    b = rs.rand(10)
    base = 3.1

    t2 = tensor(b, chunk_size=4)
    r = entropy(t1, t2, base)

    result = r.execute().fetch()
    expected = sp_entropy(a, b, base)
    np.testing.assert_array_almost_equal(result, expected)

    r = entropy(t1, t2, t1.sum())

    result = r.execute().fetch()
    expected = sp_entropy(a, b, a.sum())
    np.testing.assert_array_almost_equal(result, expected)

    with pytest.raises(ValueError):
        entropy(t1, t2[:7])
예제 #6
0
    def testEntropyExecution(self):
        rs = np.random.RandomState(0)
        a = rs.rand(10)

        t1 = tensor(a, chunk_size=4)
        r = entropy(t1)

        result = self.executor.execute_tensor(r, concat=True)[0]
        expected = sp_entropy(a)
        np.testing.assert_array_almost_equal(result, expected)

        b = rs.rand(10)
        base = 3.1

        t2 = tensor(b, chunk_size=4)
        r = entropy(t1, t2, base)

        result = self.executor.execute_tensor(r, concat=True)[0]
        expected = sp_entropy(a, b, base)
        np.testing.assert_array_almost_equal(result, expected)

        b = rs.rand(10)
        base = 3.1

        t2 = tensor(b, chunk_size=4)
        r = entropy(t1, t2, base)

        result = self.executor.execute_tensor(r, concat=True)[0]
        expected = sp_entropy(a, b, base)
        np.testing.assert_array_almost_equal(result, expected)

        r = entropy(t1, t2, t1.sum())

        result = self.executor.execute_tensor(r, concat=True)[0]
        expected = sp_entropy(a, b, a.sum())
        np.testing.assert_array_almost_equal(result, expected)

        with self.assertRaises(ValueError):
            entropy(t1, t2[:7])
예제 #7
0
def test_entropy_random(n_samples, base, use_handle):
    handle, stream = get_handle(use_handle)

    clustering, _ = \
        generate_random_labels(lambda rng: rng.randint(0, 1000, n_samples))

    # generate unormalized probabilities from clustering
    pk = np.bincount(clustering)

    # scipy's entropy uses probabilities
    sp_S = sp_entropy(pk, base=base)
    # we use a clustering
    S = entropy(np.array(clustering, dtype=np.int32), base, handle=handle)

    assert_almost_equal(S, sp_S, decimal=2)
예제 #8
0
def test_entropy_random(n_samples, base, use_handle):
    if has_scipy():
        from scipy.stats import entropy as sp_entropy
    else:
        pytest.skip('Skipping test_entropy_random because Scipy is missing')

    handle, stream = get_handle(use_handle)

    clustering, _, _, _ = \
        generate_random_labels(lambda rng: rng.randint(0, 1000, n_samples))

    # generate unormalized probabilities from clustering
    pk = np.bincount(clustering)

    # scipy's entropy uses probabilities
    sp_S = sp_entropy(pk, base=base)
    # we use a clustering
    S = entropy(np.array(clustering, dtype=np.int32), base, handle=handle)

    assert_almost_equal(S, sp_S, decimal=2)
예제 #9
0
def D_KL(p, q, base=None):
    """Compute  Kullback-Leibler divergence between PDs p and q."""

    D = sp_entropy(p, q, base=base)
    return D
예제 #10
0
def entropy(p, base=None):
    """Compute entropy of probability distribution p."""

    H = sp_entropy(p, base=base)
    return H
예제 #11
0
파일: timings.py 프로젝트: simomarsili/ndd
def scipy_entropy(counts, k):  # pylint: disable=unused-argument
    """scipy.stats.entropy() execution time"""
    start = time.time()
    _ = sp_entropy(counts)
    end = time.time()
    return end - start, 0