def test_half_space_trees(test_path): stream = SEAGenerator(classification_function=0, noise_percentage=0.1, random_state=1) learner = HalfSpaceTrees(n_estimators=13, size_limit=75, anomaly_threshold=0.90, depth=10, random_state=5) cnt = 0 max_samples = 5000 y_pred = array('i') y_proba = [] wait_samples = 500 while cnt < max_samples: X, y = stream.next_sample() # Scale inputs between 0 and 1 X = X / 10 if (cnt % wait_samples == 0) and (cnt != 0): y_pred.append(learner.predict(X)[0]) y_proba.append(learner.predict_proba(X)[0]) learner.partial_fit(X) cnt += 1 expected_predictions = array('i', [1, 0, 0, 0, 1, 0, 0, 1, 0]) assert np.alltrue(y_pred == expected_predictions) test_file = os.path.join(test_path, 'test_half_space_trees.npy') expected_proba = np.load(test_file) assert np.allclose(y_proba, expected_proba)
def test_clone(): stream = SEAGenerator(random_state=1) learner = NaiveBayes() cnt = 0 max_samples = 5000 y_pred = array('i') X_batch = [] y_batch = [] y_proba = [] wait_samples = 100 while cnt < max_samples: X, y = stream.next_sample() X_batch.append(X[0]) y_batch.append(y[0]) # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): y_pred.append(learner.predict(X)[0]) y_proba.append(learner.predict_proba(X)[0]) learner.partial_fit(X, y, classes=[0, 1]) cnt += 1 cloned = clone(learner) assert learner._observed_class_distribution != {} and cloned._observed_class_distribution == {}
def test_perceptron(test_path): stream = SEAGenerator(random_state=1) learner = PerceptronMask(random_state=1) cnt = 0 max_samples = 5000 y_pred = array('i') X_batch = [] y_batch = [] y_proba = [] wait_samples = 100 while cnt < max_samples: X, y = stream.next_sample() X_batch.append(X[0]) y_batch.append(y[0]) # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): y_pred.append(learner.predict(X)[0]) y_proba.append(learner.predict_proba(X)[0]) learner.partial_fit(X, y, classes=[0, 1]) cnt += 1 expected_predictions = array('i', [ 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1 ]) assert np.alltrue(y_pred == expected_predictions) test_file = os.path.join(test_path, 'data_perceptron_proba.npy') y_proba_expected = np.load(test_file) assert np.allclose(y_proba, y_proba_expected) expected_info = "PerceptronMask(alpha=0.0001, class_weight=None, early_stopping=False, " \ "eta0=1.0, fit_intercept=True, max_iter=1000, n_iter_no_change=5, " \ "n_jobs=None, penalty=None, random_state=1, shuffle=True, tol=0.001, " \ "validation_fraction=0.1, verbose=0, warm_start=False)" info = " ".join([line.strip() for line in learner.get_info().split()]) assert info == expected_info # Coverage tests learner.reset() if not sklearn_version.startswith("0.21"): learner.fit(X=np.asarray(X_batch[:4500]), y=np.asarray(y_batch[:4500], dtype=int)) else: # Root cause of failure (TypeError: an integer is required) is in the fit() method # in sklearn 0.21.0. This is a workaround until a fix is made available in sklearn learner.partial_fit(X=np.asarray(X_batch[:4500]), y=np.asarray(y_batch[:4500]), classes=stream.target_values) learner.predict(X=X_batch[4501:]) # Run for coverage assert type(learner.predict(X)) == np.ndarray assert type(learner.predict_proba(X)) == np.ndarray
def test_leverage_bagging(): stream = SEAGenerator(classification_function=1, noise_percentage=0.067, random_state=112) knn = KNNClassifier(n_neighbors=8, leaf_size=40, max_window_size=2000) learner = LeveragingBaggingClassifier(base_estimator=knn, n_estimators=3, random_state=112) first = True cnt = 0 max_samples = 5000 predictions = [] wait_samples = 100 correct_predictions = 0 while cnt < max_samples: X, y = stream.next_sample() # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): predictions.append(learner.predict(X)[0]) if y[0] == predictions[-1]: correct_predictions += 1 if first: learner.partial_fit(X, y, classes=[0, 1]) first = False else: learner.partial_fit(X, y) cnt += 1 performance = correct_predictions / len(predictions) expected_predictions = [1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1] assert np.alltrue(predictions == expected_predictions) expected_performance = 0.8571428571428571 assert np.isclose(expected_performance, performance) assert type(learner.predict(X)) == np.ndarray assert type(learner.predict_proba(X)) == np.ndarray expected_info = "LeveragingBaggingClassifier(base_estimator=KNNClassifier(leaf_size=40, " \ "max_window_size=2000, metric='euclidean', n_neighbors=8), " \ "delta=0.002, enable_code_matrix=False, leverage_algorithm='leveraging_bag'," \ " n_estimators=3, random_state=112, w=6)" info = " ".join([line.strip() for line in learner.get_info().split()]) assert info == expected_info
def test_online_csb2(): stream = SEAGenerator(1, noise_percentage=0.067, random_state=112) nb = NaiveBayes() learner = OnlineCSB2Classifier(base_estimator=nb, n_estimators=3, cost_positive=1, cost_negative=0.9, random_state=112) first = True cnt = 0 max_samples = 5000 predictions = [] wait_samples = 100 correct_predictions = 0 while cnt < max_samples: X, y = stream.next_sample() # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): predictions.append(learner.predict(X)[0]) if y[0] == predictions[-1]: correct_predictions += 1 if first: learner.partial_fit(X, y, classes=[0, 1]) first = False else: learner.partial_fit(X, y) cnt += 1 performance = correct_predictions / len(predictions) expected_predictions = [ 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1 ] expected_correct_predictions = 43 expected_performance = 0.8775510204081632 assert np.alltrue(predictions == expected_predictions) assert np.isclose(expected_performance, performance) assert correct_predictions == expected_correct_predictions assert type(learner.predict(X)) == np.ndarray assert type(learner.predict_proba(X)) == np.ndarray expected_info = "OnlineCSB2Classifier(base_estimator=NaiveBayes(nominal_attributes=None), cost_negative=0.9, " \ "cost_positive=1, drift_detection=True, n_estimators=3, random_state=112)" info = " ".join([line.strip() for line in learner.get_info().split()]) assert info == expected_info
def test_naive_bayes(test_path): stream = SEAGenerator(random_state=1) learner = NaiveBayes() cnt = 0 max_samples = 5000 y_pred = array('i') X_batch = [] y_batch = [] y_proba = [] wait_samples = 100 while cnt < max_samples: X, y = stream.next_sample() X_batch.append(X[0]) y_batch.append(y[0]) # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): y_pred.append(learner.predict(X)[0]) y_proba.append(learner.predict_proba(X)[0]) learner.partial_fit(X, y, classes=[0, 1]) cnt += 1 expected_predictions = array('i', [1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1]) assert np.alltrue(y_pred == expected_predictions) test_file = os.path.join(test_path, 'data_naive_bayes_proba.npy') y_proba_expected = np.load(test_file) assert np.allclose(y_proba, y_proba_expected) expected_info = 'NaiveBayes(nominal_attributes=None)' assert learner.get_info() == expected_info learner.reset() learner.fit(X=np.array(X_batch[:4500]), y=np.array(y_batch[:4500])) expected_score = 0.9378757515030061 assert np.isclose(expected_score, learner.score(X=np.array(X_batch[4501:]), y=np.array(y_batch[4501:]))) assert is_classifier(learner) assert type(learner.predict(X)) == np.ndarray assert type(learner.predict_proba(X)) == np.ndarray
def test_oza_bagging_adwin(): stream = SEAGenerator(1, noise_percentage=0.067, random_state=112) knn = KNNClassifier(n_neighbors=8, leaf_size=40, max_window_size=2000) learner = OzaBaggingADWINClassifier(base_estimator=knn, n_estimators=3, random_state=112) first = True cnt = 0 max_samples = 5000 predictions = [] wait_samples = 100 correct_predictions = 0 while cnt < max_samples: X, y = stream.next_sample() # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): predictions.append(learner.predict(X)[0]) if y[0] == predictions[-1]: correct_predictions += 1 if first: learner.partial_fit(X, y, classes=[0, 1]) first = False else: learner.partial_fit(X, y) cnt += 1 performance = correct_predictions / len(predictions) expected_predictions = [ 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1 ] assert np.alltrue(predictions == expected_predictions) expected_performance = 0.8979591836734694 assert np.isclose(expected_performance, performance) expected_correct_predictions = 44 assert correct_predictions == expected_correct_predictions assert type(learner.predict(X)) == np.ndarray assert type(learner.predict_proba(X)) == np.ndarray expected_info = "OzaBaggingADWINClassifier(base_estimator=KNNClassifier(leaf_size=40, " \ "max_window_size=2000, metric='euclidean', n_neighbors=8), n_estimators=3, " \ "random_state=112)" info = " ".join([line.strip() for line in learner.get_info().split()]) assert info == expected_info
def test_knn_adwin(): stream = ConceptDriftStreamGenerator(stream=SEAGenerator(random_state=1), drift_stream=SEAGenerator( random_state=2, classification_function=2), random_state=1, position=250, width=10) learner = KNNADWINClassifier(n_neighbors=8, leaf_size=40, max_window_size=200) cnt = 0 max_samples = 1000 predictions = array('i') correct_predictions = 0 wait_samples = 20 while cnt < max_samples: X, y = stream.next_sample() # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): predictions.append(learner.predict(X)[0]) if y[0] == predictions[-1]: correct_predictions += 1 learner.partial_fit(X, y) cnt += 1 expected_predictions = array('i', [ 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1 ]) assert np.alltrue(predictions == expected_predictions) expected_correct_predictions = 46 assert correct_predictions == expected_correct_predictions learner.reset() assert learner.data_window.size == 0 expected_info = "KNNADWINClassifier(leaf_size=40, max_window_size=200, " \ "metric='euclidean', n_neighbors=8)" info = " ".join([line.strip() for line in learner.get_info().split()]) assert info == expected_info assert type(learner.predict(X)) == np.ndarray assert type(learner.predict_proba(X)) == np.ndarray
def test_hoeffding_adaptive_tree_nb(test_path): stream = ConceptDriftStreamGenerator( stream=SEAGenerator(random_state=1, noise_percentage=0.05), drift_stream=SEAGenerator(random_state=2, classification_function=2, noise_percentage=0.05), random_state=1, position=250, width=10) learner = HoeffdingAdaptiveTreeClassifier(leaf_prediction='nb', random_state=1) cnt = 0 max_samples = 1000 y_pred = array('i') y_proba = [] wait_samples = 20 while cnt < max_samples: X, y = stream.next_sample() # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): y_pred.append(learner.predict(X)[0]) y_proba.append(learner.predict_proba(X)[0]) learner.partial_fit(X, y) cnt += 1 expected_predictions = array('i', [ 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1 ]) assert np.alltrue(y_pred == expected_predictions) test_file = os.path.join(test_path, 'test_hoeffding_adaptive_tree_nb.npy') data = np.load(test_file) assert np.allclose(y_proba, data) expected_info = "HoeffdingAdaptiveTreeClassifier(binary_split=False, bootstrap_sampling=True, grace_period=200, " \ "leaf_prediction='nb', max_byte_size=33554432, memory_estimate_period=1000000, nb_threshold=0, " \ "no_preprune=False, nominal_attributes=None, random_state=1, remove_poor_atts=False, " \ "split_confidence=1e-07, split_criterion='info_gain', stop_mem_management=False, tie_threshold=0.05)" info = " ".join([line.strip() for line in learner.get_info().split()]) assert info == expected_info assert type(learner.predict(X)) == np.ndarray assert type(learner.predict_proba(X)) == np.ndarray
def test_hoeffding_tree_model_information(): stream = SEAGenerator(random_state=1, noise_percentage=0.05) X, y = get_next_n_samples(stream, 5000) nominal_attr_idx = [x for x in range(5, stream.n_features)] learner = HoeffdingTreeClassifier(nominal_attributes=nominal_attr_idx) learner.partial_fit(X, y, classes=[0, 1]) expected_info = { 'Tree size (nodes)': 5, 'Tree size (leaves)': 3, 'Active learning nodes': 3, 'Tree depth': 2, 'Active leaf byte size estimate': 0.0, 'Inactive leaf byte size estimate': 0.0, 'Byte size estimate overhead': 1.0 } observed_info = learner.model_measurements for k in expected_info: assert k in observed_info assert expected_info[k] == observed_info[k] expected_description = "if Attribute 0 <= 4.549969620513424:\n" \ " if Attribute 1 <= 5.440182925299016:\n" \ " Leaf = Class 0 | {0: 345.54817975126275, 1: 44.43855503614928}\n" \ " if Attribute 1 > 5.440182925299016:\n" \ " Leaf = Class 1 | {0: 54.451820248737235, 1: 268.5614449638507}\n" \ "if Attribute 0 > 4.549969620513424:\n" \ " Leaf = Class 1 | {0: 390.5845685762964, 1: 2372.3747376855454}\n" \ assert expected_description == learner.get_model_description()
def test_extremely_fast_decision_tree_coverage(): # Cover memory management max_size_kb = 20 stream = SEAGenerator(random_state=1, noise_percentage=0.05) X, y = get_next_n_samples(stream, 5000) # Unconstrained model has over 50 kB learner = ExtremelyFastDecisionTreeClassifier(leaf_prediction='mc', memory_estimate_period=200, max_byte_size=max_size_kb * 2**10, min_samples_reevaluate=2500) learner.partial_fit(X, y, classes=[0, 1]) assert calculate_object_size(learner, 'kB') <= max_size_kb learner.reset() # Cover nominal attribute observer stream = RandomTreeGenerator(tree_random_state=23, sample_random_state=12, n_classes=2, n_cat_features=2, n_categories_per_cat_feature=4, n_num_features=1, max_tree_depth=30, min_leaf_depth=10, fraction_leaves_per_level=0.45) X, y = get_next_n_samples(stream, 5000) learner = ExtremelyFastDecisionTreeClassifier( leaf_prediction='nba', nominal_attributes=[i for i in range(1, 9)]) learner.partial_fit(X, y, classes=[0, 1])
def test_dynamic_weighted_majority(): stream = SEAGenerator(1, noise_percentage=0.067, random_state=112) learner = DynamicWeightedMajorityClassifier(3, NaiveBayes(), beta=0.5, theta=0.01) cnt = 0 max_samples = 5000 predictions = [] wait_samples = 100 correct_predictions = 0 first = True while cnt < max_samples: X, y = stream.next_sample() # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): predictions.append(learner.predict(X)[0]) if y[0] == predictions[-1]: correct_predictions += 1 if first: learner.partial_fit(X, y, classes=[0, 1]) first = False else: learner.partial_fit(X, y) cnt += 1 performance = correct_predictions / len(predictions) expected_predictions = [ 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1 ] expected_correct_predictions = 44 expected_performance = 0.8979591836734694 assert np.alltrue(predictions == expected_predictions) assert np.isclose(expected_performance, performance) assert correct_predictions == expected_correct_predictions assert type(learner.predict(X)) == np.ndarray expected_info = 'DynamicWeightedMajorityClassifier(base_estimator=NaiveBayes(nominal_attributes=None),\n' \ ' beta=0.5, n_estimators=3, period=50,\n' \ ' theta=0.01)' assert learner.get_info() == expected_info
def test_online_rus_3(): stream = SEAGenerator(1, noise_percentage=0.067, random_state=112) nb = NaiveBayes() learner = OnlineRUSBoostClassifier(base_estimator=nb, n_estimators=3, sampling_rate=5, algorithm=3, random_state=112) first = True cnt = 0 max_samples = 5000 predictions = [] wait_samples = 100 correct_predictions = 0 while cnt < max_samples: X, y = stream.next_sample() # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): predictions.append(learner.predict(X)[0]) if y[0] == predictions[-1]: correct_predictions += 1 if first: learner.partial_fit(X, y, classes=[0, 1]) first = False else: learner.partial_fit(X, y) cnt += 1 performance = correct_predictions / len(predictions) expected_predictions = [ 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1 ] expected_correct_predictions = 35 expected_performance = 0.7142857142857143 assert np.alltrue(predictions == expected_predictions) assert np.isclose(expected_performance, performance) assert correct_predictions == expected_correct_predictions assert type(learner.predict(X)) == np.ndarray assert type(learner.predict_proba(X)) == np.ndarray assert type(learner.predict_proba(X)) == np.ndarray
def test_sam_knn_coverage(): stream = SEAGenerator(random_state=1) hyperParams = { 'maxSize': 50, 'n_neighbors': 3, 'weighting': 'uniform', 'stm_size_option': 'maxACC', 'min_stm_size': 10, 'use_ltm': True } learner = SAMKNNClassifier(n_neighbors=hyperParams['n_neighbors'], max_window_size=hyperParams['maxSize'], weighting=hyperParams['weighting'], stm_size_option=hyperParams['stm_size_option'], min_stm_size=hyperParams['min_stm_size'], use_ltm=hyperParams['use_ltm']) cnt = 0 max_samples = 1000 predictions = array('i') wait_samples = 20 while cnt < max_samples: X, y = stream.next_sample() # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): predictions.append(learner.predict(X)[0]) learner.partial_fit(X, y) cnt += 1 expected_predictions = array('i', [ 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1 ]) assert np.alltrue(predictions == expected_predictions) expected_info = "SAMKNNClassifier(ltm_size=0.4, max_window_size=None, min_stm_size=10, n_neighbors=3, " \ "stm_size_option='maxACC', use_ltm=True, weighting='uniform')" info = " ".join([line.strip() for line in learner.get_info().split()]) assert info == expected_info
def test_additive_expert_ensemble_weakest(): stream = SEAGenerator(1, noise_percentage=0.067, random_state=112) learner = AdditiveExpertEnsembleClassifier(3, NaiveBayes(), beta=0.5, gamma=0.1, pruning='weakest') cnt = 0 max_samples = 5000 predictions = [] wait_samples = 100 correct_predictions = 0 first = True while cnt < max_samples: X, y = stream.next_sample() # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): predictions.append(learner.predict(X)[0]) if y[0] == predictions[-1]: correct_predictions += 1 if first: learner.partial_fit(X, y, classes=[0, 1]) first = False else: learner.partial_fit(X, y) cnt += 1 performance = correct_predictions / len(predictions) expected_predictions = [1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1] expected_correct_predictions = 45 expected_performance = 0.9183673469387755 assert np.alltrue(predictions == expected_predictions) assert np.isclose(expected_performance, performance) assert correct_predictions == expected_correct_predictions assert type(learner.predict(X)) == np.ndarray expected_info = "AdditiveExpertEnsembleClassifier(base_estimator=NaiveBayes(nominal_attributes=None),\n" \ " beta=0.5, gamma=0.1, n_estimators=3,\n" \ " pruning='weakest')" assert learner.get_info() == expected_info
def test_sam_knn(): stream = SEAGenerator(random_state=1) hyperParams = { 'maxSize': 1000, 'nNeighbours': 5, 'knnWeights': 'distance', 'STMSizeAdaption': 'maxACCApprox', 'use_ltm': False } learner = SAMKNNClassifier(n_neighbors=hyperParams['nNeighbours'], max_window_size=hyperParams['maxSize'], weighting=hyperParams['knnWeights'], stm_size_option=hyperParams['STMSizeAdaption'], use_ltm=hyperParams['use_ltm']) cnt = 0 max_samples = 5000 predictions = array('d') wait_samples = 100 while cnt < max_samples: X, y = stream.next_sample() # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): predictions.append(learner.predict(X)[0]) learner.partial_fit(X, y) cnt += 1 expected_predictions = array('i', [ 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1 ]) assert np.alltrue(predictions == expected_predictions) assert type(learner.predict(X)) == np.ndarray with pytest.raises(NotImplementedError): learner.predict_proba(X)
def test_sea_generator(test_path): stream = SEAGenerator(classification_function=2, random_state=112, balance_classes=False, noise_percentage=0.28) # Load test data corresponding to first 10 instances test_file = os.path.join(test_path, 'sea_stream.npz') data = np.load(test_file) X_expected = data['X'] y_expected = data['y'] for j in range(0, 10): X, y = stream.next_sample() assert np.alltrue(np.isclose(X, X_expected[j])) assert np.alltrue(np.isclose(y[0], y_expected[j])) expected_info = "SEAGenerator(balance_classes=False, classification_function=2, noise_percentage=0.28, random_state=112)" assert stream.get_info() == expected_info
def test_leverage_bagging_half(): knn = KNNClassifier(n_neighbors=8, leaf_size=40, max_window_size=2000) # leveraging_bag_half learner = LeveragingBaggingClassifier(base_estimator=knn, n_estimators=3, random_state=112, leverage_algorithm='leveraging_bag_half') y_expected = np.asarray([0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0], dtype=np.int) run_prequential_supervised(SEAGenerator(classification_function=1, noise_percentage=0.067, random_state=112), learner, max_samples=2000, n_wait=40, target_values=[0,1], y_expected=y_expected)
def test_leverage_bagging_coverage(): # Invalid leverage_algorithm with pytest.raises(ValueError): LeveragingBaggingClassifier(leverage_algorithm='invalid') estimator = LeveragingBaggingClassifier(random_state=4321) stream = SEAGenerator(random_state=4321) X, y = stream.next_sample() # classes not passed in partial_fit with pytest.raises(ValueError): estimator.partial_fit(X, y, classes=None) estimator.partial_fit(X, y, classes=[0, 1]) # different observed classes with pytest.raises(ValueError): estimator.partial_fit(X, y, classes=[0, 1] + [-1]) # Invalid leverage_algorithm, changed after initialization with pytest.raises(RuntimeError): estimator.leverage_algorithm = 'invalid' estimator.partial_fit(X, y, classes=[0, 1]) # Reset ensemble estimator.reset() assert estimator.classes is None
def test_learn_nse(): stream = SEAGenerator(random_state=2212) estimator = GaussianNB() corrects, acc, classifier = run_classifier(estimator, stream) expected_correct_predictions = 1754 expected_acc = 0.877 assert np.isclose(expected_acc, acc) assert corrects == expected_correct_predictions # Test reset method classifier.reset() assert len(classifier.ensemble) == 0 assert len(classifier.ensemble_weights) == 0 assert len(classifier.bkts) == 0 assert len(classifier.wkts) == 0 assert len(classifier.X_batch) == 0 assert len(classifier.y_batch) == 0 expected_info = 'LearnPPNSEClassifier(base_estimator=GaussianNB(), crossing_point=10, ' \ 'n_estimators=15, pruning=None, slope=0.5, window_size=250)' info = " ".join([line.strip() for line in classifier.get_info().split()]) assert info == expected_info # test pruning error corrects, acc, classifier = run_classifier(estimator, stream, pruning="error", ensemble_size=5) expected_correct_predictions = 1751 expected_acc = 0.8755 assert np.isclose(expected_acc, acc) assert corrects == expected_correct_predictions # test pruning age corrects, acc, classifier = run_classifier(estimator, stream, pruning="age", ensemble_size=5) expected_correct_predictions = 1774 expected_acc = 0.887 assert np.isclose(expected_acc, acc) assert corrects == expected_correct_predictions stream = SEAGenerator(random_state=2212) estimator = HoeffdingTreeClassifier() classifier = LearnPPNSEClassifier(base_estimator=estimator) # Keeping track of sample count and correct prediction count sample_count = 0 corrects = 0 m = 250 # Pre training the classifier X, y = get_next_n_samples(stream, m) classifier.partial_fit(X, y, classes=[0, 1]) # print(classifier.ensemble_weights) for i in range(10): X, y = get_next_n_samples(stream, m) pred = classifier.predict(X) classifier.partial_fit(X, y) if pred is not None: # print(pred) corrects += np.sum(y == pred) sample_count += m acc = corrects / sample_count expected_acc = 0.9436 assert acc == expected_acc
def test_knn(): stream = SEAGenerator(random_state=1) learner = KNNClassifier(n_neighbors=8, max_window_size=2000, leaf_size=40) cnt = 0 max_samples = 5000 predictions = array('i') correct_predictions = 0 wait_samples = 100 X_batch = [] y_batch = [] while cnt < max_samples: X, y = stream.next_sample() X_batch.append(X[0]) y_batch.append(y[0]) # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): predictions.append(learner.predict(X)[0]) if y[0] == predictions[-1]: correct_predictions += 1 learner.partial_fit(X, y) cnt += 1 expected_predictions = array('i', [ 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1 ]) assert np.alltrue(predictions == expected_predictions) expected_correct_predictions = 49 assert correct_predictions == expected_correct_predictions expected_info = "KNNClassifier(leaf_size=40, max_window_size=2000, " \ "metric='euclidean', n_neighbors=8)" info = " ".join([line.strip() for line in learner.get_info().split()]) assert info == expected_info learner.reset() info = " ".join([line.strip() for line in learner.get_info().split()]) assert info == expected_info X_batch = np.array(X_batch) y_batch = np.array(y_batch) learner.fit(X_batch[:4500], y_batch[:4500], classes=[0, 1]) predictions = learner.predict(X_batch[4501:4550]) expected_predictions = array('i', [ 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0 ]) assert np.alltrue(predictions == expected_predictions) correct_predictions = sum(predictions == y_batch[4501:4550]) expected_correct_predictions = 49 assert correct_predictions == expected_correct_predictions assert type(learner.predict(X)) == np.ndarray assert type(learner.predict_proba(X)) == np.ndarray
def test_rslvq(): stream = SEAGenerator(random_state=1) learner_adadelta = RSLVQ(gradient_descent='adadelta') learner_vanilla = RSLVQ(gradient_descent='vanilla') cnt = 0 max_samples = 5000 y_pred_vanilla = array('i') y_pred_adadelta = array('i') X_batch = [] y_batch = [] wait_samples = 100 # Check if predicted labels are as expected while cnt < max_samples: X, y = stream.next_sample() X_batch.append(X[0]) y_batch.append(y[0]) # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): y_pred_vanilla.append(learner_vanilla.predict(X)[0]) y_pred_adadelta.append(learner_adadelta.predict(X)[0]) learner_adadelta.partial_fit(X, y, classes=[0, 1]) learner_vanilla.partial_fit(X, y, classes=[0, 1]) cnt += 1 expected_predictions_vanilla = array('i', [ 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1 ]) expected_predictions_adadelta = array('i', [ 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1 ]) assert np.alltrue(y_pred_vanilla == expected_predictions_vanilla) assert np.alltrue(y_pred_adadelta == expected_predictions_adadelta) # Check get_info method expected_info = "RobustSoftLearningVectorQuantization(gamma=0.9, gradient_descent='vanilla',\n" \ " initial_prototypes=None,\n" \ " prototypes_per_class=1, random_state=None,\n" \ " sigma=1.0)" assert learner_vanilla.get_info() == expected_info # Check reset method learner_vanilla.reset() learner_vanilla.fit(X=np.array(X_batch[:4500]), y=np.array(y_batch[:4500])) learner_adadelta.reset() learner_adadelta.fit(X=np.array(X_batch[:4500]), y=np.array(y_batch[:4500])) # Check classifiers performance learner_w_init_ppt = RSLVQ( initial_prototypes=[[2.59922826, 2.57368134, 4.92501, 0], [6.05801971, 6.01383352, 5.02135783, 1]], gradient_descent='adadelta') learner_w_init_ppt.fit(X=np.array(X_batch[:4500]), y=np.array(y_batch[:4500])) expected_score_ppt = .9539078156312625 assert np.isclose( expected_score_ppt, learner_w_init_ppt.score(X=np.array(X_batch[4501:]), y=np.array(y_batch[4501:]))) expected_score_vanilla = .8897795591182365 assert np.isclose( expected_score_vanilla, learner_vanilla.score(X=np.array(X_batch[4501:]), y=np.array(y_batch[4501:]))) expected_score_adadelta = .9458917835671342 assert np.isclose( expected_score_adadelta, learner_adadelta.score(X=np.array(X_batch[4501:]), y=np.array(y_batch[4501:]))) # Check types assert is_classifier(learner_vanilla) assert is_classifier(learner_adadelta) assert type(learner_vanilla.predict(X)) == np.ndarray assert type(learner_adadelta.predict(X)) == np.ndarray # Check properties after learning expected_prototypes = np.array([[2.59922826, 2.57368134, 4.92501], [6.05801971, 6.01383352, 5.02135783]]) assert np.allclose(learner_adadelta.prototypes, expected_prototypes) expected_prototypes_classes = np.array([0, 1]) assert np.allclose(learner_adadelta.prototypes_classes, expected_prototypes_classes) expected_class_labels = np.array([0, 1]) assert np.allclose(learner_adadelta.class_labels, expected_class_labels)