コード例 #1
0
def test_arbitrary_encoding_automatically_find_variables(df_enc):
    # test case 2: automatically select variables, unordered encoding
    encoder = OrdinalEncoder(encoding_method="arbitrary", variables=None)
    X = encoder.fit_transform(df_enc)

    # expected output
    transf_df = df_enc.copy()
    transf_df["var_A"] = [
        0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2
    ]
    transf_df["var_B"] = [
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2
    ]

    # test init params
    assert encoder.encoding_method == "arbitrary"
    assert encoder.variables is None
    # test fit attr
    assert encoder.variables_ == ["var_A", "var_B"]
    assert encoder.encoder_dict_ == {
        "var_A": {
            "A": 0,
            "B": 1,
            "C": 2
        },
        "var_B": {
            "A": 0,
            "B": 1,
            "C": 2
        },
    }
    assert encoder.n_features_in_ == 3
    # test transform output
    pd.testing.assert_frame_equal(X, transf_df)
コード例 #2
0
def test_arbitrary_encoding_automatically_find_variables_ignore_format(
        df_enc_numeric):

    encoder = OrdinalEncoder(encoding_method="arbitrary",
                             variables=None,
                             ignore_format=True)
    X = encoder.fit_transform(df_enc_numeric[["var_A", "var_B"]])

    # expected output
    transf_df = df_enc_numeric[["var_A", "var_B"]].copy()
    transf_df["var_A"] = [
        0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2
    ]
    transf_df["var_B"] = [
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2
    ]

    # test init params
    assert encoder.encoding_method == "arbitrary"
    assert encoder.variables is None
    # test fit attr
    assert encoder.variables_ == ["var_A", "var_B"]
    assert encoder.encoder_dict_ == {
        "var_A": {
            1: 0,
            2: 1,
            3: 2
        },
        "var_B": {
            1: 0,
            2: 1,
            3: 2
        },
    }
    assert encoder.n_features_in_ == 2
    # test transform output
    pd.testing.assert_frame_equal(X, transf_df)