Python GapEncoder.fit 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: dirty_cat

클래스/타입: GapEncoder

메소드/함수: fit

hotexamples.com에서의 예제들: 6

Python GapEncoder.fit - 6개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 dirty_cat.GapEncoder.fit에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

GapEncoder(16)

fit(6)

fit_transform(5)

transform(4)

get_feature_names(3)

get_feature_names_out(2)

partial_fit(2)

score(1)

예제 #1

파일 보기

def test_gap_encoder(hashing, init, analyzer, add_words, n_samples=70):
    X_txt = fetch_20newsgroups(subset='train')['data']
    X = X_txt[:n_samples]
    n_components = 10
    # Test output shape
    encoder = GapEncoder(
        n_components=n_components, hashing=hashing, init=init,
        analyzer=analyzer, add_words=add_words,
        random_state=42, rescale_W=True)
    encoder.fit(X)
    y = encoder.transform(X)
    assert y.shape == (n_samples, n_components), str(y.shape)
    assert len(set(y[0])) == n_components

    # Test L1-norm of topics W.
    l1_norm_W = np.abs(encoder.W_).sum(axis=1)
    np.testing.assert_array_almost_equal(
        l1_norm_W, np.ones(n_components))

    # Test same seed return the same output
    encoder = GapEncoder(
        n_components=n_components, hashing=hashing, init=init,
        analyzer=analyzer, add_words=add_words,
        random_state=42)
    encoder.fit(X)
    y2 = encoder.transform(X)
    np.testing.assert_array_equal(y, y2)
    return

예제 #2

파일 보기

파일: test_gap_encoder.py 프로젝트: dirty-cat/dirty_cat

def test_analyzer(init1, analyzer1, analyzer2):
    """" Test if the output is different when the analyzer is 'word' or 'char'.
        If it is, no error ir raised. 
    """
    add_words = False
    n_samples = 70
    X_txt = fetch_20newsgroups(subset='train')['data'][:n_samples]
    X = np.array([X_txt, X_txt]).T
    n_components = 10
    # Test first analyzer output:
    encoder = GapEncoder(n_components=n_components,
                         init='k-means++',
                         analyzer=analyzer1,
                         add_words=add_words,
                         random_state=42,
                         rescale_W=True)
    encoder.fit(X)
    y = encoder.transform(X)

    # Test the other analyzer output:
    encoder = GapEncoder(n_components=n_components,
                         init='k-means++',
                         analyzer=analyzer2,
                         add_words=add_words,
                         random_state=42)
    encoder.fit(X)
    y2 = encoder.transform(X)

    # Test inequality btw analyzer word and char ouput:
    np.testing.assert_raises(AssertionError, np.testing.assert_array_equal, y,
                             y2)

예제 #3

파일 보기

def test_overflow_error():
    np.seterr(over='raise', divide='raise')
    r = np.random.RandomState(0)
    X = r.randint(1e5, 1e6, size=(8000, 1)).astype(str)
    enc = GapEncoder(n_components=2, batch_size=1, min_iter=1, max_iter=1,
                     random_state=0)
    enc.fit(X)
    return

예제 #4

파일 보기

def test_get_feature_names(n_samples=70):
    X_txt = fetch_20newsgroups(subset='train')['data']
    X = X_txt[:n_samples]
    enc = GapEncoder()
    enc.fit(X)
    topic_labels = enc.get_feature_names()
    # Check number of labels
    assert len(topic_labels) == enc.n_components
    return

예제 #5

파일 보기

파일: test_gap_encoder.py 프로젝트: dirty-cat/dirty_cat

def test_score(n_samples=70):
    X_txt = fetch_20newsgroups(subset='train')['data'][:n_samples]
    X1 = np.array(X_txt)[:, None]
    X2 = np.hstack([X1, X1])
    enc = GapEncoder(random_state=42)
    enc.fit(X1)
    score_X1 = enc.score(X1)
    enc.fit(X2)
    score_X2 = enc.score(X2)
    # Check that two identical columns give the same score
    assert score_X1 * 2 == score_X2
    return

예제 #6

파일 보기

파일: test_gap_encoder.py 프로젝트: dirty-cat/dirty_cat

def test_get_feature_names_out(n_samples=70):
    X_txt = fetch_20newsgroups(subset='train')['data'][:n_samples]
    X = np.array([X_txt, X_txt]).T
    enc = GapEncoder(random_state=42)
    enc.fit(X)
    for topic_labels in [enc.get_feature_names(), enc.get_feature_names_out()]:
        # Check number of labels
        assert len(topic_labels) == enc.n_components * X.shape[1]
        # Test different parameters for col_names
        topic_labels_2 = enc.get_feature_names_out(col_names='auto')
        assert topic_labels_2[0] == 'col0: ' + topic_labels[0]
        topic_labels_3 = enc.get_feature_names_out(col_names=['abc', 'def'])
        assert topic_labels_3[0] == 'abc: ' + topic_labels[0]
    return