Beispiel #1
0
def test_half_space_trees(test_path):
    stream = SEAGenerator(classification_function=0,
                          noise_percentage=0.1,
                          random_state=1)

    learner = HalfSpaceTrees(n_estimators=13,
                             size_limit=75,
                             anomaly_threshold=0.90,
                             depth=10,
                             random_state=5)

    cnt = 0
    max_samples = 5000
    y_pred = array('i')
    y_proba = []
    wait_samples = 500

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Scale inputs between 0 and 1
        X = X / 10
        if (cnt % wait_samples == 0) and (cnt != 0):
            y_pred.append(learner.predict(X)[0])
            y_proba.append(learner.predict_proba(X)[0])
        learner.partial_fit(X)
        cnt += 1

    expected_predictions = array('i', [1, 0, 0, 0, 1, 0, 0, 1, 0])
    assert np.alltrue(y_pred == expected_predictions)
    test_file = os.path.join(test_path, 'test_half_space_trees.npy')
    expected_proba = np.load(test_file)
    assert np.allclose(y_proba, expected_proba)
def test_clone():
    stream = SEAGenerator(random_state=1)

    learner = NaiveBayes()

    cnt = 0
    max_samples = 5000
    y_pred = array('i')
    X_batch = []
    y_batch = []
    y_proba = []
    wait_samples = 100

    while cnt < max_samples:
        X, y = stream.next_sample()
        X_batch.append(X[0])
        y_batch.append(y[0])
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            y_pred.append(learner.predict(X)[0])
            y_proba.append(learner.predict_proba(X)[0])
        learner.partial_fit(X, y, classes=[0, 1])
        cnt += 1

    cloned = clone(learner)

    assert learner._observed_class_distribution != {} and cloned._observed_class_distribution == {}
Beispiel #3
0
def test_perceptron(test_path):
    stream = SEAGenerator(random_state=1)

    learner = PerceptronMask(random_state=1)

    cnt = 0
    max_samples = 5000
    y_pred = array('i')
    X_batch = []
    y_batch = []
    y_proba = []
    wait_samples = 100

    while cnt < max_samples:
        X, y = stream.next_sample()
        X_batch.append(X[0])
        y_batch.append(y[0])
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            y_pred.append(learner.predict(X)[0])
            y_proba.append(learner.predict_proba(X)[0])
        learner.partial_fit(X, y, classes=[0, 1])
        cnt += 1

    expected_predictions = array('i', [
        1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0,
        1
    ])
    assert np.alltrue(y_pred == expected_predictions)

    test_file = os.path.join(test_path, 'data_perceptron_proba.npy')
    y_proba_expected = np.load(test_file)
    assert np.allclose(y_proba, y_proba_expected)

    expected_info = "PerceptronMask(alpha=0.0001, class_weight=None, early_stopping=False, " \
                    "eta0=1.0, fit_intercept=True, max_iter=1000, n_iter_no_change=5, " \
                    "n_jobs=None, penalty=None, random_state=1, shuffle=True, tol=0.001, " \
                    "validation_fraction=0.1, verbose=0, warm_start=False)"
    info = " ".join([line.strip() for line in learner.get_info().split()])
    assert info == expected_info

    # Coverage tests
    learner.reset()
    if not sklearn_version.startswith("0.21"):
        learner.fit(X=np.asarray(X_batch[:4500]),
                    y=np.asarray(y_batch[:4500], dtype=int))
    else:
        # Root cause of failure (TypeError: an integer is required) is in the fit() method
        # in sklearn 0.21.0. This is a workaround until a fix is made available in sklearn
        learner.partial_fit(X=np.asarray(X_batch[:4500]),
                            y=np.asarray(y_batch[:4500]),
                            classes=stream.target_values)
    learner.predict(X=X_batch[4501:])  # Run for coverage

    assert type(learner.predict(X)) == np.ndarray
    assert type(learner.predict_proba(X)) == np.ndarray
def test_leverage_bagging():
    stream = SEAGenerator(classification_function=1,
                          noise_percentage=0.067,
                          random_state=112)
    knn = KNNClassifier(n_neighbors=8,
                        leaf_size=40,
                        max_window_size=2000)
    learner = LeveragingBaggingClassifier(base_estimator=knn,
                                          n_estimators=3,
                                          random_state=112)
    first = True

    cnt = 0
    max_samples = 5000
    predictions = []
    wait_samples = 100
    correct_predictions = 0

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(learner.predict(X)[0])
            if y[0] == predictions[-1]:
                correct_predictions += 1
        if first:
            learner.partial_fit(X, y, classes=[0, 1])
            first = False
        else:
            learner.partial_fit(X, y)
        cnt += 1

    performance = correct_predictions / len(predictions)
    expected_predictions = [1, 0, 1, 0, 0, 0, 0, 1, 0, 1,
                            0, 0, 1, 0, 1, 1, 1, 0, 1, 0,
                            0, 1, 0, 0, 1, 1, 0, 1, 0, 1,
                            1, 1, 1, 1, 0, 0, 1, 1, 1, 0,
                            0, 1, 1, 0, 1, 0, 0, 1, 1]
    assert np.alltrue(predictions == expected_predictions)

    expected_performance = 0.8571428571428571
    assert np.isclose(expected_performance, performance)

    assert type(learner.predict(X)) == np.ndarray
    assert type(learner.predict_proba(X)) == np.ndarray

    expected_info = "LeveragingBaggingClassifier(base_estimator=KNNClassifier(leaf_size=40, " \
                    "max_window_size=2000, metric='euclidean', n_neighbors=8), " \
                    "delta=0.002, enable_code_matrix=False, leverage_algorithm='leveraging_bag'," \
                    " n_estimators=3, random_state=112, w=6)"
    info = " ".join([line.strip() for line in learner.get_info().split()])
    assert info == expected_info
def test_online_csb2():
    stream = SEAGenerator(1, noise_percentage=0.067, random_state=112)
    nb = NaiveBayes()
    learner = OnlineCSB2Classifier(base_estimator=nb,
                                   n_estimators=3,
                                   cost_positive=1,
                                   cost_negative=0.9,
                                   random_state=112)
    first = True

    cnt = 0
    max_samples = 5000
    predictions = []
    wait_samples = 100
    correct_predictions = 0

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(learner.predict(X)[0])
            if y[0] == predictions[-1]:
                correct_predictions += 1
        if first:
            learner.partial_fit(X, y, classes=[0, 1])
            first = False
        else:
            learner.partial_fit(X, y)
        cnt += 1
    performance = correct_predictions / len(predictions)
    expected_predictions = [
        1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0,
        1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
        1
    ]

    expected_correct_predictions = 43
    expected_performance = 0.8775510204081632

    assert np.alltrue(predictions == expected_predictions)
    assert np.isclose(expected_performance, performance)
    assert correct_predictions == expected_correct_predictions

    assert type(learner.predict(X)) == np.ndarray
    assert type(learner.predict_proba(X)) == np.ndarray

    expected_info = "OnlineCSB2Classifier(base_estimator=NaiveBayes(nominal_attributes=None), cost_negative=0.9, " \
                    "cost_positive=1, drift_detection=True, n_estimators=3, random_state=112)"
    info = " ".join([line.strip() for line in learner.get_info().split()])
    assert info == expected_info
Beispiel #6
0
def test_naive_bayes(test_path):
    stream = SEAGenerator(random_state=1)

    learner = NaiveBayes()

    cnt = 0
    max_samples = 5000
    y_pred = array('i')
    X_batch = []
    y_batch = []
    y_proba = []
    wait_samples = 100

    while cnt < max_samples:
        X, y = stream.next_sample()
        X_batch.append(X[0])
        y_batch.append(y[0])
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            y_pred.append(learner.predict(X)[0])
            y_proba.append(learner.predict_proba(X)[0])
        learner.partial_fit(X, y, classes=[0, 1])
        cnt += 1

    expected_predictions = array('i', [1, 1, 1, 0, 1, 1, 1, 0, 0, 1,
                                       1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
                                       1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
                                       0, 0, 1, 1, 0, 0, 1, 0, 1, 1,
                                       1, 1, 0, 1, 0, 0, 1, 1, 1])

    assert np.alltrue(y_pred == expected_predictions)

    test_file = os.path.join(test_path, 'data_naive_bayes_proba.npy')
    y_proba_expected = np.load(test_file)
    assert np.allclose(y_proba, y_proba_expected)

    expected_info = 'NaiveBayes(nominal_attributes=None)'
    assert learner.get_info() == expected_info

    learner.reset()
    learner.fit(X=np.array(X_batch[:4500]), y=np.array(y_batch[:4500]))

    expected_score = 0.9378757515030061
    assert np.isclose(expected_score, learner.score(X=np.array(X_batch[4501:]),
                                                    y=np.array(y_batch[4501:])))

    assert is_classifier(learner)

    assert type(learner.predict(X)) == np.ndarray
    assert type(learner.predict_proba(X)) == np.ndarray
def test_oza_bagging_adwin():
    stream = SEAGenerator(1, noise_percentage=0.067, random_state=112)
    knn = KNNClassifier(n_neighbors=8, leaf_size=40, max_window_size=2000)
    learner = OzaBaggingADWINClassifier(base_estimator=knn,
                                        n_estimators=3,
                                        random_state=112)
    first = True

    cnt = 0
    max_samples = 5000
    predictions = []
    wait_samples = 100
    correct_predictions = 0

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(learner.predict(X)[0])
            if y[0] == predictions[-1]:
                correct_predictions += 1
        if first:
            learner.partial_fit(X, y, classes=[0, 1])
            first = False
        else:
            learner.partial_fit(X, y)
        cnt += 1
    performance = correct_predictions / len(predictions)
    expected_predictions = [
        1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0,
        1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
        1
    ]
    assert np.alltrue(predictions == expected_predictions)

    expected_performance = 0.8979591836734694
    assert np.isclose(expected_performance, performance)

    expected_correct_predictions = 44
    assert correct_predictions == expected_correct_predictions

    assert type(learner.predict(X)) == np.ndarray
    assert type(learner.predict_proba(X)) == np.ndarray

    expected_info = "OzaBaggingADWINClassifier(base_estimator=KNNClassifier(leaf_size=40, " \
                    "max_window_size=2000, metric='euclidean', n_neighbors=8), n_estimators=3, " \
                    "random_state=112)"
    info = " ".join([line.strip() for line in learner.get_info().split()])
    assert info == expected_info
Beispiel #8
0
def test_knn_adwin():
    stream = ConceptDriftStreamGenerator(stream=SEAGenerator(random_state=1),
                                         drift_stream=SEAGenerator(
                                             random_state=2,
                                             classification_function=2),
                                         random_state=1,
                                         position=250,
                                         width=10)

    learner = KNNADWINClassifier(n_neighbors=8,
                                 leaf_size=40,
                                 max_window_size=200)

    cnt = 0
    max_samples = 1000
    predictions = array('i')
    correct_predictions = 0
    wait_samples = 20

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(learner.predict(X)[0])
            if y[0] == predictions[-1]:
                correct_predictions += 1
        learner.partial_fit(X, y)
        cnt += 1

    expected_predictions = array('i', [
        1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0,
        1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1,
        1
    ])
    assert np.alltrue(predictions == expected_predictions)

    expected_correct_predictions = 46
    assert correct_predictions == expected_correct_predictions

    learner.reset()
    assert learner.data_window.size == 0

    expected_info = "KNNADWINClassifier(leaf_size=40, max_window_size=200, " \
                    "metric='euclidean', n_neighbors=8)"
    info = " ".join([line.strip() for line in learner.get_info().split()])
    assert info == expected_info

    assert type(learner.predict(X)) == np.ndarray
    assert type(learner.predict_proba(X)) == np.ndarray
def test_hoeffding_adaptive_tree_nb(test_path):
    stream = ConceptDriftStreamGenerator(
        stream=SEAGenerator(random_state=1, noise_percentage=0.05),
        drift_stream=SEAGenerator(random_state=2,
                                  classification_function=2,
                                  noise_percentage=0.05),
        random_state=1,
        position=250,
        width=10)

    learner = HoeffdingAdaptiveTreeClassifier(leaf_prediction='nb',
                                              random_state=1)

    cnt = 0
    max_samples = 1000
    y_pred = array('i')
    y_proba = []
    wait_samples = 20

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            y_pred.append(learner.predict(X)[0])
            y_proba.append(learner.predict_proba(X)[0])
        learner.partial_fit(X, y)
        cnt += 1

    expected_predictions = array('i', [
        1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0,
        1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1,
        1
    ])
    assert np.alltrue(y_pred == expected_predictions)

    test_file = os.path.join(test_path, 'test_hoeffding_adaptive_tree_nb.npy')
    data = np.load(test_file)
    assert np.allclose(y_proba, data)

    expected_info = "HoeffdingAdaptiveTreeClassifier(binary_split=False, bootstrap_sampling=True, grace_period=200, " \
                    "leaf_prediction='nb', max_byte_size=33554432, memory_estimate_period=1000000, nb_threshold=0, " \
                    "no_preprune=False, nominal_attributes=None, random_state=1, remove_poor_atts=False, " \
                    "split_confidence=1e-07, split_criterion='info_gain', stop_mem_management=False, tie_threshold=0.05)"
    info = " ".join([line.strip() for line in learner.get_info().split()])
    assert info == expected_info

    assert type(learner.predict(X)) == np.ndarray
    assert type(learner.predict_proba(X)) == np.ndarray
Beispiel #10
0
def test_hoeffding_tree_model_information():
    stream = SEAGenerator(random_state=1, noise_percentage=0.05)
    X, y = get_next_n_samples(stream, 5000)

    nominal_attr_idx = [x for x in range(5, stream.n_features)]
    learner = HoeffdingTreeClassifier(nominal_attributes=nominal_attr_idx)

    learner.partial_fit(X, y, classes=[0, 1])

    expected_info = {
        'Tree size (nodes)': 5,
        'Tree size (leaves)': 3,
        'Active learning nodes': 3,
        'Tree depth': 2,
        'Active leaf byte size estimate': 0.0,
        'Inactive leaf byte size estimate': 0.0,
        'Byte size estimate overhead': 1.0
    }

    observed_info = learner.model_measurements
    for k in expected_info:
        assert k in observed_info
        assert expected_info[k] == observed_info[k]

    expected_description = "if Attribute 0 <= 4.549969620513424:\n" \
                            "  if Attribute 1 <= 5.440182925299016:\n" \
                            "    Leaf = Class 0 | {0: 345.54817975126275, 1: 44.43855503614928}\n" \
                            "  if Attribute 1 > 5.440182925299016:\n" \
                            "    Leaf = Class 1 | {0: 54.451820248737235, 1: 268.5614449638507}\n" \
                            "if Attribute 0 > 4.549969620513424:\n" \
                            "  Leaf = Class 1 | {0: 390.5845685762964, 1: 2372.3747376855454}\n" \

    assert expected_description == learner.get_model_description()
def test_extremely_fast_decision_tree_coverage():
    # Cover memory management
    max_size_kb = 20
    stream = SEAGenerator(random_state=1, noise_percentage=0.05)
    X, y = get_next_n_samples(stream, 5000)

    # Unconstrained model has over 50 kB
    learner = ExtremelyFastDecisionTreeClassifier(leaf_prediction='mc',
                                                  memory_estimate_period=200,
                                                  max_byte_size=max_size_kb *
                                                  2**10,
                                                  min_samples_reevaluate=2500)

    learner.partial_fit(X, y, classes=[0, 1])
    assert calculate_object_size(learner, 'kB') <= max_size_kb

    learner.reset()

    # Cover nominal attribute observer
    stream = RandomTreeGenerator(tree_random_state=23,
                                 sample_random_state=12,
                                 n_classes=2,
                                 n_cat_features=2,
                                 n_categories_per_cat_feature=4,
                                 n_num_features=1,
                                 max_tree_depth=30,
                                 min_leaf_depth=10,
                                 fraction_leaves_per_level=0.45)
    X, y = get_next_n_samples(stream, 5000)
    learner = ExtremelyFastDecisionTreeClassifier(
        leaf_prediction='nba', nominal_attributes=[i for i in range(1, 9)])
    learner.partial_fit(X, y, classes=[0, 1])
def test_dynamic_weighted_majority():
    stream = SEAGenerator(1, noise_percentage=0.067, random_state=112)

    learner = DynamicWeightedMajorityClassifier(3,
                                                NaiveBayes(),
                                                beta=0.5,
                                                theta=0.01)

    cnt = 0
    max_samples = 5000
    predictions = []
    wait_samples = 100
    correct_predictions = 0
    first = True

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(learner.predict(X)[0])
            if y[0] == predictions[-1]:
                correct_predictions += 1
        if first:
            learner.partial_fit(X, y, classes=[0, 1])
            first = False
        else:
            learner.partial_fit(X, y)
        cnt += 1
    performance = correct_predictions / len(predictions)
    expected_predictions = [
        1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0,
        1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
        1
    ]
    expected_correct_predictions = 44
    expected_performance = 0.8979591836734694

    assert np.alltrue(predictions == expected_predictions)
    assert np.isclose(expected_performance, performance)
    assert correct_predictions == expected_correct_predictions

    assert type(learner.predict(X)) == np.ndarray

    expected_info = 'DynamicWeightedMajorityClassifier(base_estimator=NaiveBayes(nominal_attributes=None),\n' \
                    '                                  beta=0.5, n_estimators=3, period=50,\n' \
                    '                                  theta=0.01)'
    assert learner.get_info() == expected_info
def test_online_rus_3():
    stream = SEAGenerator(1, noise_percentage=0.067, random_state=112)
    nb = NaiveBayes()
    learner = OnlineRUSBoostClassifier(base_estimator=nb,
                                       n_estimators=3,
                                       sampling_rate=5,
                                       algorithm=3,
                                       random_state=112)
    first = True

    cnt = 0
    max_samples = 5000
    predictions = []
    wait_samples = 100
    correct_predictions = 0

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(learner.predict(X)[0])
            if y[0] == predictions[-1]:
                correct_predictions += 1
        if first:
            learner.partial_fit(X, y, classes=[0, 1])
            first = False
        else:
            learner.partial_fit(X, y)
        cnt += 1
    performance = correct_predictions / len(predictions)
    expected_predictions = [
        1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1,
        1
    ]

    expected_correct_predictions = 35
    expected_performance = 0.7142857142857143

    assert np.alltrue(predictions == expected_predictions)
    assert np.isclose(expected_performance, performance)
    assert correct_predictions == expected_correct_predictions

    assert type(learner.predict(X)) == np.ndarray
    assert type(learner.predict_proba(X)) == np.ndarray
    assert type(learner.predict_proba(X)) == np.ndarray
def test_sam_knn_coverage():

    stream = SEAGenerator(random_state=1)

    hyperParams = {
        'maxSize': 50,
        'n_neighbors': 3,
        'weighting': 'uniform',
        'stm_size_option': 'maxACC',
        'min_stm_size': 10,
        'use_ltm': True
    }

    learner = SAMKNNClassifier(n_neighbors=hyperParams['n_neighbors'],
                               max_window_size=hyperParams['maxSize'],
                               weighting=hyperParams['weighting'],
                               stm_size_option=hyperParams['stm_size_option'],
                               min_stm_size=hyperParams['min_stm_size'],
                               use_ltm=hyperParams['use_ltm'])

    cnt = 0
    max_samples = 1000
    predictions = array('i')

    wait_samples = 20

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(learner.predict(X)[0])
        learner.partial_fit(X, y)
        cnt += 1

    expected_predictions = array('i', [
        1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1,
        1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1,
        1
    ])
    assert np.alltrue(predictions == expected_predictions)

    expected_info = "SAMKNNClassifier(ltm_size=0.4, max_window_size=None, min_stm_size=10, n_neighbors=3, " \
                    "stm_size_option='maxACC', use_ltm=True, weighting='uniform')"
    info = " ".join([line.strip() for line in learner.get_info().split()])
    assert info == expected_info
Beispiel #15
0
def test_additive_expert_ensemble_weakest():
    stream = SEAGenerator(1, noise_percentage=0.067, random_state=112)

    learner = AdditiveExpertEnsembleClassifier(3, NaiveBayes(), beta=0.5, gamma=0.1,
                                               pruning='weakest')

    cnt = 0
    max_samples = 5000
    predictions = []
    wait_samples = 100
    correct_predictions = 0
    first = True

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(learner.predict(X)[0])
            if y[0] == predictions[-1]:
                correct_predictions += 1
        if first:
            learner.partial_fit(X, y, classes=[0, 1])
            first = False
        else:
            learner.partial_fit(X, y)
        cnt += 1
    performance = correct_predictions / len(predictions)

    expected_predictions = [1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1,
                            0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,
                            0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1]
    expected_correct_predictions = 45
    expected_performance = 0.9183673469387755

    assert np.alltrue(predictions == expected_predictions)
    assert np.isclose(expected_performance, performance)
    assert correct_predictions == expected_correct_predictions

    assert type(learner.predict(X)) == np.ndarray

    expected_info = "AdditiveExpertEnsembleClassifier(base_estimator=NaiveBayes(nominal_attributes=None),\n" \
                    "                                 beta=0.5, gamma=0.1, n_estimators=3,\n" \
                    "                                 pruning='weakest')"
    assert learner.get_info() == expected_info
def test_sam_knn():

    stream = SEAGenerator(random_state=1)

    hyperParams = {
        'maxSize': 1000,
        'nNeighbours': 5,
        'knnWeights': 'distance',
        'STMSizeAdaption': 'maxACCApprox',
        'use_ltm': False
    }

    learner = SAMKNNClassifier(n_neighbors=hyperParams['nNeighbours'],
                               max_window_size=hyperParams['maxSize'],
                               weighting=hyperParams['knnWeights'],
                               stm_size_option=hyperParams['STMSizeAdaption'],
                               use_ltm=hyperParams['use_ltm'])

    cnt = 0
    max_samples = 5000
    predictions = array('d')

    wait_samples = 100

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(learner.predict(X)[0])
        learner.partial_fit(X, y)
        cnt += 1

    expected_predictions = array('i', [
        1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0,
        1
    ])

    assert np.alltrue(predictions == expected_predictions)

    assert type(learner.predict(X)) == np.ndarray

    with pytest.raises(NotImplementedError):
        learner.predict_proba(X)
def test_sea_generator(test_path):
    stream = SEAGenerator(classification_function=2,
                          random_state=112,
                          balance_classes=False,
                          noise_percentage=0.28)

    # Load test data corresponding to first 10 instances
    test_file = os.path.join(test_path, 'sea_stream.npz')
    data = np.load(test_file)
    X_expected = data['X']
    y_expected = data['y']

    for j in range(0, 10):
        X, y = stream.next_sample()
        assert np.alltrue(np.isclose(X, X_expected[j]))
        assert np.alltrue(np.isclose(y[0], y_expected[j]))

    expected_info = "SEAGenerator(balance_classes=False, classification_function=2, noise_percentage=0.28, random_state=112)"
    assert stream.get_info() == expected_info
def test_leverage_bagging_half():
    knn = KNNClassifier(n_neighbors=8, leaf_size=40, max_window_size=2000)
    # leveraging_bag_half
    learner = LeveragingBaggingClassifier(base_estimator=knn, n_estimators=3, random_state=112, leverage_algorithm='leveraging_bag_half')

    y_expected = np.asarray([0, 1, 1, 0, 1, 0, 1, 0, 1, 0,
                             1, 0, 0, 0, 1, 0, 1, 1, 1, 1,
                             1, 1, 1, 1, 0, 1, 0, 1, 1, 0,
                             0, 0, 1, 1, 1, 0, 1, 1, 0, 0,
                             1, 0, 0, 1, 0, 0, 0, 1, 1, 0], dtype=np.int)

    run_prequential_supervised(SEAGenerator(classification_function=1, noise_percentage=0.067, random_state=112),
                               learner, max_samples=2000, n_wait=40, target_values=[0,1], y_expected=y_expected)
def test_leverage_bagging_coverage():
    # Invalid leverage_algorithm
    with pytest.raises(ValueError):
        LeveragingBaggingClassifier(leverage_algorithm='invalid')

    estimator = LeveragingBaggingClassifier(random_state=4321)
    stream = SEAGenerator(random_state=4321)
    X, y = stream.next_sample()

    # classes not passed in partial_fit
    with pytest.raises(ValueError):
        estimator.partial_fit(X, y, classes=None)
    estimator.partial_fit(X, y, classes=[0, 1])
    # different observed classes
    with pytest.raises(ValueError):
        estimator.partial_fit(X, y, classes=[0, 1] + [-1])
    # Invalid leverage_algorithm, changed after initialization
    with pytest.raises(RuntimeError):
        estimator.leverage_algorithm = 'invalid'
        estimator.partial_fit(X, y, classes=[0, 1])

    # Reset ensemble
    estimator.reset()
    assert estimator.classes is None
def test_learn_nse():
    stream = SEAGenerator(random_state=2212)

    estimator = GaussianNB()

    corrects, acc, classifier = run_classifier(estimator, stream)

    expected_correct_predictions = 1754
    expected_acc = 0.877

    assert np.isclose(expected_acc, acc)
    assert corrects == expected_correct_predictions

    # Test reset method
    classifier.reset()
    assert len(classifier.ensemble) == 0
    assert len(classifier.ensemble_weights) == 0
    assert len(classifier.bkts) == 0
    assert len(classifier.wkts) == 0
    assert len(classifier.X_batch) == 0
    assert len(classifier.y_batch) == 0

    expected_info = 'LearnPPNSEClassifier(base_estimator=GaussianNB(), crossing_point=10, ' \
                    'n_estimators=15, pruning=None, slope=0.5, window_size=250)'
    info = " ".join([line.strip() for line in classifier.get_info().split()])
    assert info == expected_info
    # test pruning error
    corrects, acc, classifier = run_classifier(estimator,
                                               stream,
                                               pruning="error",
                                               ensemble_size=5)

    expected_correct_predictions = 1751
    expected_acc = 0.8755

    assert np.isclose(expected_acc, acc)
    assert corrects == expected_correct_predictions

    # test pruning age
    corrects, acc, classifier = run_classifier(estimator,
                                               stream,
                                               pruning="age",
                                               ensemble_size=5)

    expected_correct_predictions = 1774
    expected_acc = 0.887

    assert np.isclose(expected_acc, acc)
    assert corrects == expected_correct_predictions

    stream = SEAGenerator(random_state=2212)

    estimator = HoeffdingTreeClassifier()

    classifier = LearnPPNSEClassifier(base_estimator=estimator)

    # Keeping track of sample count and correct prediction count
    sample_count = 0
    corrects = 0

    m = 250
    # Pre training the classifier
    X, y = get_next_n_samples(stream, m)
    classifier.partial_fit(X, y, classes=[0, 1])

    # print(classifier.ensemble_weights)
    for i in range(10):
        X, y = get_next_n_samples(stream, m)
        pred = classifier.predict(X)
        classifier.partial_fit(X, y)

        if pred is not None:
            # print(pred)
            corrects += np.sum(y == pred)
        sample_count += m

    acc = corrects / sample_count
    expected_acc = 0.9436
    assert acc == expected_acc
def test_knn():
    stream = SEAGenerator(random_state=1)

    learner = KNNClassifier(n_neighbors=8, max_window_size=2000, leaf_size=40)
    cnt = 0
    max_samples = 5000
    predictions = array('i')
    correct_predictions = 0
    wait_samples = 100
    X_batch = []
    y_batch = []

    while cnt < max_samples:
        X, y = stream.next_sample()
        X_batch.append(X[0])
        y_batch.append(y[0])
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(learner.predict(X)[0])
            if y[0] == predictions[-1]:
                correct_predictions += 1
        learner.partial_fit(X, y)
        cnt += 1

    expected_predictions = array('i', [
        1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0,
        1
    ])
    assert np.alltrue(predictions == expected_predictions)

    expected_correct_predictions = 49
    assert correct_predictions == expected_correct_predictions

    expected_info = "KNNClassifier(leaf_size=40, max_window_size=2000, " \
                    "metric='euclidean', n_neighbors=8)"
    info = " ".join([line.strip() for line in learner.get_info().split()])
    assert info == expected_info

    learner.reset()
    info = " ".join([line.strip() for line in learner.get_info().split()])
    assert info == expected_info

    X_batch = np.array(X_batch)
    y_batch = np.array(y_batch)
    learner.fit(X_batch[:4500], y_batch[:4500], classes=[0, 1])
    predictions = learner.predict(X_batch[4501:4550])

    expected_predictions = array('i', [
        1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1,
        1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1,
        0
    ])
    assert np.alltrue(predictions == expected_predictions)

    correct_predictions = sum(predictions == y_batch[4501:4550])
    expected_correct_predictions = 49
    assert correct_predictions == expected_correct_predictions

    assert type(learner.predict(X)) == np.ndarray
    assert type(learner.predict_proba(X)) == np.ndarray
Beispiel #22
0
def test_rslvq():
    stream = SEAGenerator(random_state=1)

    learner_adadelta = RSLVQ(gradient_descent='adadelta')
    learner_vanilla = RSLVQ(gradient_descent='vanilla')

    cnt = 0
    max_samples = 5000
    y_pred_vanilla = array('i')
    y_pred_adadelta = array('i')
    X_batch = []
    y_batch = []
    wait_samples = 100

    # Check if predicted labels are as expected
    while cnt < max_samples:
        X, y = stream.next_sample()
        X_batch.append(X[0])
        y_batch.append(y[0])
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            y_pred_vanilla.append(learner_vanilla.predict(X)[0])
            y_pred_adadelta.append(learner_adadelta.predict(X)[0])
        learner_adadelta.partial_fit(X, y, classes=[0, 1])
        learner_vanilla.partial_fit(X, y, classes=[0, 1])
        cnt += 1

    expected_predictions_vanilla = array('i', [
        1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1,
        0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0,
        1
    ])

    expected_predictions_adadelta = array('i', [
        1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0,
        1
    ])

    assert np.alltrue(y_pred_vanilla == expected_predictions_vanilla)
    assert np.alltrue(y_pred_adadelta == expected_predictions_adadelta)

    # Check get_info method
    expected_info = "RobustSoftLearningVectorQuantization(gamma=0.9, gradient_descent='vanilla',\n" \
                    "                                     initial_prototypes=None,\n" \
                    "                                     prototypes_per_class=1, random_state=None,\n" \
                    "                                     sigma=1.0)"

    assert learner_vanilla.get_info() == expected_info

    # Check reset method
    learner_vanilla.reset()
    learner_vanilla.fit(X=np.array(X_batch[:4500]), y=np.array(y_batch[:4500]))

    learner_adadelta.reset()
    learner_adadelta.fit(X=np.array(X_batch[:4500]),
                         y=np.array(y_batch[:4500]))

    # Check classifiers performance
    learner_w_init_ppt = RSLVQ(
        initial_prototypes=[[2.59922826, 2.57368134, 4.92501, 0],
                            [6.05801971, 6.01383352, 5.02135783, 1]],
        gradient_descent='adadelta')
    learner_w_init_ppt.fit(X=np.array(X_batch[:4500]),
                           y=np.array(y_batch[:4500]))

    expected_score_ppt = .9539078156312625
    assert np.isclose(
        expected_score_ppt,
        learner_w_init_ppt.score(X=np.array(X_batch[4501:]),
                                 y=np.array(y_batch[4501:])))

    expected_score_vanilla = .8897795591182365
    assert np.isclose(
        expected_score_vanilla,
        learner_vanilla.score(X=np.array(X_batch[4501:]),
                              y=np.array(y_batch[4501:])))

    expected_score_adadelta = .9458917835671342
    assert np.isclose(
        expected_score_adadelta,
        learner_adadelta.score(X=np.array(X_batch[4501:]),
                               y=np.array(y_batch[4501:])))

    # Check types
    assert is_classifier(learner_vanilla)
    assert is_classifier(learner_adadelta)

    assert type(learner_vanilla.predict(X)) == np.ndarray
    assert type(learner_adadelta.predict(X)) == np.ndarray

    # Check properties after learning
    expected_prototypes = np.array([[2.59922826, 2.57368134, 4.92501],
                                    [6.05801971, 6.01383352, 5.02135783]])

    assert np.allclose(learner_adadelta.prototypes, expected_prototypes)

    expected_prototypes_classes = np.array([0, 1])

    assert np.allclose(learner_adadelta.prototypes_classes,
                       expected_prototypes_classes)

    expected_class_labels = np.array([0, 1])

    assert np.allclose(learner_adadelta.class_labels, expected_class_labels)