Esempio n. 1
0
def test_OutSamplerTransformer_regressor(multi_output):

    np.random.seed(123)
    X = np.random.randn(100, 10)
    if multi_output:
        y = np.random.randn(100, 2)
    else:
        y = np.random.randn(100)

    model = OutSamplerTransformer(RandomForestRegressor(n_estimators=10,
                                                        random_state=123),
                                  cv=10)
    model.fit(X, y)

    y1 = model.model.predict(X)
    y2 = model.transform(X)

    assert not is_classifier(model)
    assert not is_regressor(model)

    if multi_output:
        assert np.abs(y1[:, 0] - y2[:, 0]).max() <= 10**(-10)
        assert np.abs(y1[:, 1] - y2[:, 1]).max() <= 10**(-10)
        assert y2.shape == (100, 2)

        assert model.get_feature_names() == [
            "output%d__RandomForestRegressor__target" % d
            for d in range(y.shape[1])
        ]

    else:
        assert np.abs(y1 - y2[:, 0]).max() <= 10**(-10)
        assert y2.shape == (100, 1)

        assert model.get_feature_names() == ["RandomForestRegressor__target"]
Esempio n. 2
0
def test_OutSamplerTransformer_classifier():

    np.random.seed(123)
    X = np.random.randn(100, 10)
    y = 1 * (np.random.randn(100) > 0)

    model = OutSamplerTransformer(
        RandomForestClassifier(n_estimators=10, random_state=123))
    model.fit(X, y)

    p1 = model.model.predict_proba(X)
    p2 = model.transform(X)

    assert not is_classifier(model)
    assert not is_regressor(model)

    assert np.abs(p1[:, 1] - p2[:, 0]).max() <= 10**(-10)
    assert p2.shape == (100, 1)

    assert model.get_feature_names() == ["RandomForestClassifier__1"]

    y = np.array(["a", "b", "c"])[np.random.randint(0, 3, 100)]

    model = OutSamplerTransformer(
        RandomForestClassifier(n_estimators=10, random_state=123))
    model.fit(X, y)

    p1 = model.model.predict_proba(X)
    p2 = model.transform(X)

    assert p1.shape == (100, 3)
    assert p2.shape == (100, 3)

    assert np.abs(p1 - p2).max() <= 10**(-10)

    assert model.get_feature_names() == [
        "RandomForestClassifier__a",
        "RandomForestClassifier__b",
        "RandomForestClassifier__c",
    ]
Esempio n. 3
0
def test_OutSamplerTransformer_regressor():

    np.random.seed(123)
    X = np.random.randn(100, 10)
    y = np.random.randn(100)

    model = OutSamplerTransformer(RandomForestRegressor(n_estimators=10,
                                                        random_state=123),
                                  cv=10)
    model.fit(X, y)

    y1 = model.model.predict(X)
    y2 = model.transform(X)

    assert not is_classifier(model)
    assert not is_regressor(model)

    assert np.abs(y1 - y2[:, 0]).max() <= 10**(-10)
    assert y2.shape == (100, 1)

    assert model.get_feature_names() == ["RandomForestRegressor__target"]
Esempio n. 4
0
def test_OutSamplerTransformer_classifier(multi_output):

    np.random.seed(123)
    X = np.random.randn(100, 10)
    if multi_output:
        y = 1 * (np.random.randn(100, 2) > 0)
    else:
        y = 1 * (np.random.randn(100) > 0)

    model = OutSamplerTransformer(
        RandomForestClassifier(n_estimators=10, random_state=123))
    model.fit(X, y)

    p1 = model.model.predict_proba(X)
    p2 = model.transform(X)

    assert not is_classifier(model)
    assert not is_regressor(model)

    if multi_output:

        assert np.abs(p1[0][:, 1] - p2[:, 0]).max() <= 10**(-10)
        assert np.abs(p1[1][:, 1] - p2[:, 1]).max() <= 10**(-10)
    else:
        assert np.abs(p1[:, 1] - p2[:, 0]).max() <= 10**(-10)
    assert p2.shape == (100, 1 + 1 * multi_output)

    if multi_output:
        assert model.get_feature_names() == [
            "output%d__RandomForestClassifier__1" % d
            for d in range(y.shape[1])
        ]
    else:
        assert model.get_feature_names() == ["RandomForestClassifier__1"]

    ### Test with strings
    if multi_output:
        y = np.array(["a", "b", "c"])[np.random.randint(0, 3, 200).reshape(
            (100, 2))]
    else:
        y = np.array(["a", "b", "c"])[np.random.randint(0, 3, 100)]

    model = OutSamplerTransformer(
        RandomForestClassifier(n_estimators=10, random_state=123))
    model.fit(X, y)

    p1 = model.model.predict_proba(X)
    p2 = model.transform(X)

    if multi_output:
        assert isinstance(p1, list)
        assert len(p1) == y.shape[1]
        assert p2.shape == (100, 6)

        assert np.abs(p1[0] - p2[:, 0:3]).max() <= 10**(-10)
        assert np.abs(p1[1] - p2[:, 3:]).max() <= 10**(-10)
    else:
        assert p1.shape == (100, 3)
        assert p2.shape == (100, 3)

        assert np.abs(p1 - p2).max() <= 10**(-10)
        assert model.get_feature_names() == [
            "RandomForestClassifier__a",
            "RandomForestClassifier__b",
            "RandomForestClassifier__c",
        ]