Python mutual_info_classif 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: sklearn.feature_selection.mutual_info_

메소드/함수: mutual_info_classif

hotexamples.com에서의 예제들: 7

Python mutual_info_classif - 7개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 sklearn.feature_selection.mutual_info_.mutual_info_classif에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: test_mutual_info.py 프로젝트: tcwwh/scikit-learn

def test_mutual_info_classif_mixed():
    # Here the target is discrete and there are two continuous and one
    # discrete feature. The idea of this test is clear from the code.
    rng = check_random_state(0)
    X = rng.rand(1000, 3)
    X[:, 1] += X[:, 0]
    y = ((0.5 * X[:, 0] + X[:, 2]) > 0.5).astype(int)
    X[:, 2] = X[:, 2] > 0.5

    mi = mutual_info_classif(X,
                             y,
                             discrete_features=[2],
                             n_neighbors=3,
                             random_state=0)
    assert_array_equal(np.argsort(-mi), [2, 0, 1])
    for n_neighbors in [5, 7, 9]:
        mi_nn = mutual_info_classif(X,
                                    y,
                                    discrete_features=[2],
                                    n_neighbors=n_neighbors,
                                    random_state=0)
        # Check that the continuous values have an higher MI with greater
        # n_neighbors
        assert mi_nn[0] > mi[0]
        assert mi_nn[1] > mi[1]
        # The n_neighbors should not have any effect on the discrete value
        # The MI should be the same
        assert mi_nn[2] == mi[2]

예제 #2

파일 보기

파일: test_mutual_info.py 프로젝트: tcwwh/scikit-learn

def test_mutual_info_classif_discrete():
    X = np.array([[0, 0, 0], [1, 1, 0], [2, 0, 1], [2, 0, 1], [2, 0, 1]])
    y = np.array([0, 1, 2, 2, 1])

    # Here X[:, 0] is the most informative feature, and X[:, 1] is weakly
    # informative.
    mi = mutual_info_classif(X, y, discrete_features=True)
    assert_array_equal(np.argsort(-mi), np.array([0, 2, 1]))

예제 #3

파일 보기

파일: test_mutual_info.py 프로젝트: 0664j35t3r/scikit-learn

def test_mutual_info_classif_mixed():
    # Here the target is discrete and there are two continuous and one
    # discrete feature. The idea of this test is clear from the code.
    np.random.seed(0)
    X = np.random.rand(1000, 3)
    X[:, 1] += X[:, 0]
    y = ((0.5 * X[:, 0] + X[:, 2]) > 0.5).astype(int)
    X[:, 2] = X[:, 2] > 0.5

    mi = mutual_info_classif(X, y, discrete_features=[2], random_state=0)
    assert_array_equal(np.argsort(-mi), [2, 0, 1])

예제 #4

파일 보기

파일: test_mutual_info.py 프로젝트: QPanProjects/Surrogate-Model

def test_mutual_info_classif_mixed():
    # Here the target is discrete and there are two continuous and one
    # discrete feature. The idea of this test is clear from the code.
    np.random.seed(0)
    X = np.random.rand(1000, 3)
    X[:, 1] += X[:, 0]
    y = ((0.5 * X[:, 0] + X[:, 2]) > 0.5).astype(int)
    X[:, 2] = X[:, 2] > 0.5

    mi = mutual_info_classif(X, y, discrete_features=[2], random_state=0)
    assert_array_equal(np.argsort(-mi), [2, 0, 1])

예제 #5

파일 보기

파일: test_mutual_info.py 프로젝트: aniryou/scikit-learn

def test_mutual_info_classif_discrete():
    X = np.array([[0, 0, 0],
                  [1, 1, 0],
                  [2, 0, 1],
                  [2, 0, 1],
                  [2, 0, 1]])
    y = np.array([0, 1, 2, 2, 1])

    # Here X[:, 0] is the most informative feature, and X[:, 1] is weakly
    # informative.
    mi = mutual_info_classif(X, y, discrete_features=True)
    assert_array_equal(np.argsort(-mi), np.array([0, 2, 1]))

예제 #6

파일 보기

파일: test_mutual_info.py 프로젝트: aniryou/scikit-learn

def test_mutual_info_classif_mixed():
    # Here the target is discrete and there are two continuous and one
    # discrete feature. The idea of this test is clear from the code.
    rng = check_random_state(0)
    X = rng.rand(1000, 3)
    X[:, 1] += X[:, 0]
    y = ((0.5 * X[:, 0] + X[:, 2]) > 0.5).astype(int)
    X[:, 2] = X[:, 2] > 0.5

    mi = mutual_info_classif(X, y, discrete_features=[2], n_neighbors=3,
                             random_state=0)
    assert_array_equal(np.argsort(-mi), [2, 0, 1])
    for n_neighbors in [5, 7, 9]:
        mi_nn = mutual_info_classif(X, y, discrete_features=[2],
                                    n_neighbors=n_neighbors, random_state=0)
        # Check that the continuous values have an higher MI with greater
        # n_neighbors
        assert_greater(mi_nn[0], mi[0])
        assert_greater(mi_nn[1], mi[1])
        # The n_neighbors should not have any effect on the discrete value
        # The MI should be the same
        assert_equal(mi_nn[2], mi[2])

예제 #7

파일 보기

def get_mutual_information(inputs, targets, token2idx, stop_words = None, mask_token = None):

    # convert X to CSC format
    data, row, col = convert_X_to_ijv_format(inputs)
    counts = csc_matrix((data, (row, col)), shape = (inputs.shape[0], len(token2idx)))

#     tf_idf_transformer = TfidfTransformer(norm = 'l2', use_idf = True, smooth_idf = True, sublinear_tf = True)
#     tf_idf_transformer.fit(counts)
#     counts = tf_idf_transformer.transform(counts)

    mi = mutual_info_classif(counts, targets)

    mi[token2idx[mask_token]] = 0.0
    for stop_word in stop_words:
        if stop_word in token2idx:
            mi[token2idx[stop_word]] = 0.0

    print('Maximum mutual information:', np.max(mi))
    print('Minimum mutual information:', np.min(mi))
    mi += 1e-9
    return mi