Python _compute_miの例

プログラミング言語: Python

名前空間/パッケージ名: sklearn.feature_selection._mutual_info

メソッド/関数: _compute_mi

hotexamples.comのコード掲載数: 6

Python _compute_mi - 6件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのsklearn.feature_selection._mutual_info._compute_miの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

ファイル: test_mutual_info.py プロジェクト: danielmoreira12/BAProject

def test_compute_mi_cd():
    # To test define a joint distribution as follows:
    # p(x, y) = p(x) p(y | x)
    # X ~ Bernoulli(p)
    # (Y | x = 0) ~ Uniform(-1, 1)
    # (Y | x = 1) ~ Uniform(0, 2)

    # Use the following formula for mutual information:
    # I(X; Y) = H(Y) - H(Y | X)
    # Two entropies can be computed by hand:
    # H(Y) = -(1-p)/2 * ln((1-p)/2) - p/2*log(p/2) - 1/2*log(1/2)
    # H(Y | X) = ln(2)

    # Now we need to implement sampling from out distribution, which is
    # done easily using conditional distribution logic.

    n_samples = 1000
    rng = check_random_state(0)

    for p in [0.3, 0.5, 0.7]:
        x = rng.uniform(size=n_samples) > p

        y = np.empty(n_samples)
        mask = x == 0
        y[mask] = rng.uniform(-1, 1, size=np.sum(mask))
        y[~mask] = rng.uniform(0, 2, size=np.sum(~mask))

        I_theory = -0.5 * ((1 - p) * np.log(0.5 * (1 - p)) +
                           p * np.log(0.5 * p) + np.log(0.5)) - np.log(2)

        # Assert the same tolerance.
        for n_neighbors in [3, 5, 7]:
            I_computed = _compute_mi(x, y, True, False, n_neighbors)
            assert_almost_equal(I_computed, I_theory, 1)

コード例 #2

ファイルを表示

ファイル: test_mutual_info.py プロジェクト: danielmoreira12/BAProject

def test_compute_mi_cc():
    # For two continuous variables a good approach is to test on bivariate
    # normal distribution, where mutual information is known.

    # Mean of the distribution, irrelevant for mutual information.
    mean = np.zeros(2)

    # Setup covariance matrix with correlation coeff. equal 0.5.
    sigma_1 = 1
    sigma_2 = 10
    corr = 0.5
    cov = np.array([[sigma_1**2, corr * sigma_1 * sigma_2],
                    [corr * sigma_1 * sigma_2, sigma_2**2]])

    # True theoretical mutual information.
    I_theory = (np.log(sigma_1) + np.log(sigma_2) -
                0.5 * np.log(np.linalg.det(cov)))

    rng = check_random_state(0)
    Z = rng.multivariate_normal(mean, cov, size=1000)

    x, y = Z[:, 0], Z[:, 1]

    # Theory and computed values won't be very close, assert that the
    # first figures after decimal point match.
    for n_neighbors in [3, 5, 7]:
        I_computed = _compute_mi(x, y, False, False, n_neighbors)
        assert_almost_equal(I_computed, I_theory, 1)

コード例 #3

ファイルを表示

def test_compute_mi_cc(global_dtype):
    # For two continuous variables a good approach is to test on bivariate
    # normal distribution, where mutual information is known.

    # Mean of the distribution, irrelevant for mutual information.
    mean = np.zeros(2)

    # Setup covariance matrix with correlation coeff. equal 0.5.
    sigma_1 = 1
    sigma_2 = 10
    corr = 0.5
    cov = np.array([
        [sigma_1**2, corr * sigma_1 * sigma_2],
        [corr * sigma_1 * sigma_2, sigma_2**2],
    ])

    # True theoretical mutual information.
    I_theory = np.log(sigma_1) + np.log(sigma_2) - 0.5 * np.log(
        np.linalg.det(cov))

    rng = check_random_state(0)
    Z = rng.multivariate_normal(mean, cov, size=1000).astype(global_dtype,
                                                             copy=False)

    x, y = Z[:, 0], Z[:, 1]

    # Theory and computed values won't be very close
    # We here check with a large relative tolerance
    for n_neighbors in [3, 5, 7]:
        I_computed = _compute_mi(x, y, False, False, n_neighbors)
        assert_allclose(I_computed, I_theory, rtol=1e-1)

コード例 #4

ファイルを表示

ファイル: test_mutual_info.py プロジェクト: danielmoreira12/BAProject

def test_compute_mi_cd_unique_label():
    # Test that adding unique label doesn't change MI.
    n_samples = 100
    x = np.random.uniform(size=n_samples) > 0.5

    y = np.empty(n_samples)
    mask = x == 0
    y[mask] = np.random.uniform(-1, 1, size=np.sum(mask))
    y[~mask] = np.random.uniform(0, 2, size=np.sum(~mask))

    mi_1 = _compute_mi(x, y, True, False)

    x = np.hstack((x, 2))
    y = np.hstack((y, 10))
    mi_2 = _compute_mi(x, y, True, False)

    assert mi_1 == mi_2

コード例 #5

ファイルを表示

def test_compute_mi_cd_unique_label(global_dtype):
    # Test that adding unique label doesn't change MI.
    n_samples = 100
    x = np.random.uniform(size=n_samples) > 0.5

    y = np.empty(n_samples, global_dtype)
    mask = x == 0
    y[mask] = np.random.uniform(-1, 1, size=np.sum(mask))
    y[~mask] = np.random.uniform(0, 2, size=np.sum(~mask))

    mi_1 = _compute_mi(x, y, x_discrete=True, y_discrete=False)

    x = np.hstack((x, 2))
    y = np.hstack((y, 10))
    mi_2 = _compute_mi(x, y, x_discrete=True, y_discrete=False)

    assert_allclose(mi_1, mi_2)

コード例 #6

ファイルを表示

def test_compute_mi_dd():
    # In discrete case computations are straightforward and can be done
    # by hand on given vectors.
    x = np.array([0, 1, 1, 0, 0])
    y = np.array([1, 0, 0, 0, 1])

    H_x = H_y = -(3 / 5) * np.log(3 / 5) - (2 / 5) * np.log(2 / 5)
    H_xy = -1 / 5 * np.log(1 / 5) - 2 / 5 * np.log(2 / 5) - 2 / 5 * np.log(2 / 5)
    I_xy = H_x + H_y - H_xy

    assert_allclose(_compute_mi(x, y, x_discrete=True, y_discrete=True), I_xy)