예제 #1
0
def test_dummy_encoder_8():
    tr_df = pd.DataFrame({'A': ['a', 'b', 'd']})
    denc = DummyEncoder(ifq_thrshld=1).fit(tr_df)
    te_df = pd.DataFrame({'A': ['f', 'h', 'y']})
    encoded = denc.transform(te_df).todense()
    expected = np.array([[1, 1, 1]]).T
    assert np.array_equal(expected, encoded)
예제 #2
0
def test_dummy_encoder_6():
    df = pd.DataFrame({
        'A': ['a', 'b', 'd'],
        'B': ['d', 'd', 'f'],
        'C': [1, 2, np.nan]
    })
    denc = DummyEncoder(ifq_thrshld=1, ifq_str_rpl='unk').fit(df)
    assert denc.var_names == ['A_unk', 'B_d', 'B_unk', 'C']
예제 #3
0
def test_dummy_encoder_4():
    df = pd.DataFrame({
        'A': ['a', 'b', np.nan],
        'B': ['d', 'd', 'f'],
        'C': [1, 2, np.nan]
    })
    denc = DummyEncoder(nan_str_rpl='NaN').fit(df)
    assert denc.var_names == ['A_NaN', 'A_a', 'A_b', 'B_d', 'B_f', 'C']
예제 #4
0
def test_dummy_encoder_3():
    df = pd.DataFrame({
        'A': ['a', 'b', np.nan],
        'B': ['d', 'd', 'f'],
        'C': [1, 2, np.nan]
    })
    encoded = DummyEncoder().fit_transform(df)
    assert encoded.shape == (3, 3 + 2 + 1)
예제 #5
0
def test_dummy_encoder_5():
    df = pd.DataFrame({
        'A': ['a', 'b', np.nan],
        'B': ['d', 'd', 'f'],
        'C': [1, 2, np.nan]
    })
    encoded = DummyEncoder().fit_transform(df).todense().view(np.ndarray)
    expected = np.array([[0, 0, 1], [1, 0, 0], [0, 1, 0], [1, 1, 0], [0, 0, 1],
                         [1, 2, np.nan]]).T
    assert nan_equal(expected, encoded)
예제 #6
0
def test_dummy_encoder_1():
    denc = DummyEncoder()
    assert denc is not None
예제 #7
0
def test_dummy_encoder_9():
    _test_immutability(encoder=DummyEncoder())