コード例 #1
0
def test_random_state_uniform(nl):
    y1 = flip_labels_uniform(y, nl, random_state=SEED, exact=False)
    y2 = flip_labels_uniform(y, nl, random_state=SEED, exact=False)
    assert_array_equal(y1, y2)

    _, ya = UniformNoise(nl, random_state=SEED,
                         exact=False).simulate_noise(X, y)
    _, yb = UniformNoise(nl, random_state=SEED,
                         exact=False).simulate_noise(X, y)
    assert_array_equal(ya, yb)
    assert_array_equal(ya, y1)
コード例 #2
0
def test_uniform_exact(nl):
    yn = flip_labels_uniform(y, nl, exact=True)
    assert (y != yn).sum() / len(y) == nl

    _, ya = UniformNoise(nl, random_state=SEED,
                         exact=True).simulate_noise(X, y)
    assert (y != ya).sum() / len(y) == nl
コード例 #3
0
def test_stack_sim():
    p = Pipeline([
        ('a', UniformNoise(.3)),
        ('b', CCNoise(lcm)),
        ('c', DecisionTreeClassifier()),
    ])
    p.fit(X, y)
    p.predict(X)
コード例 #4
0
def test_example_two():
    from skclean.simulate_noise import UniformNoise
    from skclean.detectors import KDN
    from skclean.handlers import Filter
    from skclean.pipeline import Pipeline, make_pipeline  # Importing from skclean, not sklearn
    from skclean.utils import load_data

    X, y = load_data('breast_cancer')

    clf = Pipeline([
        ('scale', StandardScaler()),  # Scale features
        ('feat_sel', VarianceThreshold(.2)),  # Feature selection
        ('detector', KDN()),  # Detect mislabeled samples
        ('handler', Filter(SVC())
         ),  # Filter out likely mislabeled samples and then train a SVM
    ])

    clf_g = GridSearchCV(clf, {'detector__n_neighbors': [2, 5, 10]})
    n_clf_g = make_pipeline(UniformNoise(.3),
                            clf_g)  # Create label noise at the very first step

    print(cross_val_score(n_clf_g, X, y,
                          cv=5).mean())  # 5-fold cross validation
コード例 #5
0
    ('b', StandardScaler()),
    ('c', VarianceThreshold(.2)),
    ('d', KDN()),
    ('e', skclean.handlers.SampleWeight(dummy))
])


# Inside Pipeline
tmp_Handlers = []
for h in NOISE_HANDLERS:
    if h.iterative:  # Exclude iterative handlers
        continue
    ch = clone(h)
    ch.detector = None
    tmp_Handlers.append(ch)
preli_steps = [UniformNoise(.2, random_state=SEED), StandardScaler()]
all_comb = product(NOISE_DETECTORS, tmp_Handlers)
INSIDE_PIPE = [make_pipeline(*preli_steps + list(comb)) for comb in all_comb]

# Outside Pipe
OUTSIDE_PIPE = []
for h in NOISE_HANDLERS:
    for d in NOISE_DETECTORS:
        ch, d = clone(h), clone(d)
        ch.detector = d
        if 'random_state' in ch.get_params():  # trying to avoid flaky tests
            ch.set_params(random_state=42)
        OUTSIDE_PIPE.append(ch)

ALL_COMBS = INSIDE_PIPE + OUTSIDE_PIPE
コード例 #6
0
def test_pipe_cv():
    p = Pipeline([('a', UniformNoise(.3)), ('p', StandardScaler()),
                  ('d', KDN(n_neighbors=5)),
                  ('c', Filter(DecisionTreeClassifier()))])
    p = cross_val_score(p, X, y, cv=CV1, error_score='raise')
    print(p)
コード例 #7
0
def test_detector_init():
    p = Pipeline([('a', UniformNoise(.3)), ('p', StandardScaler()),
                  ('d', RandomForestDetector(n_estimators=50)),
                  ('c', Filter(DecisionTreeClassifier()))])
    p.fit(X, y)
    p.predict(X)
コード例 #8
0
from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import load_iris, make_classification
from sklearn.model_selection import cross_val_score, GridSearchCV, ShuffleSplit

from skclean.detectors.neighbors import KDN
from skclean.detectors.ensemble import RandomForestDetector
from skclean.handlers.filters import Filter
from skclean.pipeline import Pipeline, make_pipeline
from skclean.simulate_noise import UniformNoise, CCNoise

X, y = load_iris(return_X_y=True)
lcm = [[.7, .1, .2], [.3, .6, .1], [.1, .21, .69]]
CV1 = ShuffleSplit(n_splits=1, test_size=.23)


@pytest.mark.parametrize('sim', [UniformNoise(.3), CCNoise(lcm)])
def test_noise_sim(sim):
    p = Pipeline([('s', sim), ('c', DecisionTreeClassifier())])
    p.fit(X, y)
    p.predict(X)

    p = make_pipeline(sim, DecisionTreeClassifier())
    p.fit(X, y)
    p.predict(X)


def test_stack_sim():
    p = Pipeline([
        ('a', UniformNoise(.3)),
        ('b', CCNoise(lcm)),
        ('c', DecisionTreeClassifier()),