def test_hat_mc(test_path): stream = ConceptDriftStream(stream=SEAGenerator(random_state=1, noise_percentage=0.05), drift_stream=SEAGenerator( random_state=2, classification_function=2, noise_percentage=0.05), random_state=1, position=250, width=10) stream.prepare_for_use() learner = HAT(leaf_prediction='mc') cnt = 0 max_samples = 1000 y_pred = array('i') y_proba = [] wait_samples = 20 while cnt < max_samples: X, y = stream.next_sample() # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): y_pred.append(learner.predict(X)[0]) y_proba.append(learner.predict_proba(X)[0]) learner.partial_fit(X, y) cnt += 1 expected_predictions = array('i', [ 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1 ]) assert np.alltrue(y_pred == expected_predictions) test_file = os.path.join(test_path, 'test_hoeffding_adaptive_tree_mc.npy') data = np.load(test_file) assert np.allclose(y_proba, data) expected_info = 'HAT: max_byte_size: 33554432 - memory_estimate_period: 1000000 - grace_period: 200' \ ' - split_criterion: info_gain - split_confidence: 1e-07 - tie_threshold: 0.05' \ ' - binary_split: False - stop_mem_management: False - remove_poor_atts: False' \ ' - no_pre_prune: False - leaf_prediction: mc - nb_threshold: 0' \ ' - nominal_attributes: [] - ' assert learner.get_info() == expected_info expected_model_1 = 'Leaf = Class 1.0 | {0.0: 0.005295278636481529, 1.0: 1.9947047213635185}\n' expected_model_2 = 'Leaf = Class 1.0 | {0.0: 0.0052952786364815294, 1.0: 1.9947047213635185}\n' expected_model_3 = 'Leaf = Class 1.0 | {1.0: 1.9947047213635185, 0.0: 0.0052952786364815294}\n' assert (learner.get_model_description() == expected_model_1) \ or (learner.get_model_description() == expected_model_2) \ or (learner.get_model_description() == expected_model_3) stream.restart() X, y = stream.next_sample(5000) learner = HAT(max_byte_size=30, leaf_prediction='mc', grace_period=10) learner.partial_fit(X, y)
def test_hat_mc(test_path): stream = ConceptDriftStream(stream=SEAGenerator(random_state=1, noise_percentage=0.05), drift_stream=SEAGenerator( random_state=2, classification_function=2, noise_percentage=0.05), random_state=1, position=250, width=10) stream.prepare_for_use() learner = HAT(leaf_prediction='mc') cnt = 0 max_samples = 1000 y_pred = array('i') y_proba = [] wait_samples = 20 while cnt < max_samples: X, y = stream.next_sample() # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): y_pred.append(learner.predict(X)[0]) y_proba.append(learner.predict_proba(X)[0]) learner.partial_fit(X, y) cnt += 1 expected_predictions = array('i', [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ]) assert np.alltrue(y_pred == expected_predictions) test_file = os.path.join(test_path, 'test_hoeffding_adaptive_tree_mc.npy') data = np.load(test_file) assert np.allclose(y_proba, data) expected_info = "HAT(binary_split=False, grace_period=200, leaf_prediction='mc',\n" \ " max_byte_size=33554432, memory_estimate_period=1000000, nb_threshold=0,\n" \ " no_preprune=False, nominal_attributes=None, remove_poor_atts=False,\n" \ " split_confidence=1e-07, split_criterion='info_gain',\n" \ " stop_mem_management=False, tie_threshold=0.05)" assert learner.get_info() == expected_info expected_model_1 = 'Leaf = Class 1.0 | {0.0: 398.0, 1.0: 1000.0}\n' assert (learner.get_model_description() == expected_model_1) assert type(learner.predict(X)) == np.ndarray assert type(learner.predict_proba(X)) == np.ndarray stream.restart() X, y = stream.next_sample(5000) learner = HAT(max_byte_size=30, leaf_prediction='mc', grace_period=10) learner.partial_fit(X, y)
def test_concept_drift_stream(test_path): stream = ConceptDriftStream(random_state=1, position=20, width=5) stream.prepare_for_use() assert stream.n_remaining_samples() == -1 expected_names = [ "salary", "commission", "age", "elevel", "car", "zipcode", "hvalue", "hyears", "loan" ] assert stream.feature_names == expected_names expected_targets = [0, 1] assert stream.target_values == expected_targets assert stream.target_names == ['target'] assert stream.n_features == 9 assert stream.n_cat_features == 3 assert stream.n_num_features == 6 assert stream.n_targets == 1 assert stream.get_info() == 'ConceptDriftStream: ' \ 'First Stream: AGRAWALGenerator - ' \ 'Drift Stream: AGRAWALGenerator - ' \ 'alpha: 0.0 - position: 20 - width: 5' assert stream.has_more_samples() is True assert stream.is_restartable() is True # Load test data corresponding to first 10 instances test_file = os.path.join(test_path, 'concept_drift_stream.npz') data = np.load(test_file) X_expected = data['X'] y_expected = data['y'] X, y = stream.next_sample() assert np.alltrue(X[0] == X_expected[0]) assert np.alltrue(y[0] == y_expected[0]) X, y = stream.last_sample() assert np.alltrue(X[0] == X_expected[0]) assert np.alltrue(y[0] == y_expected[0]) stream.restart() X, y = stream.next_sample(30) assert np.alltrue(X == X_expected) assert np.alltrue(y == y_expected) assert stream.n_targets == np.array(y).ndim assert stream.n_features == X.shape[1] assert 'stream' == stream.get_class_type()
def test_knn_adwin(): stream = ConceptDriftStream(stream=SEAGenerator(random_state=1), drift_stream=SEAGenerator( random_state=2, classification_function=2), random_state=1, position=250, width=10) learner = KNNADWINClassifier(n_neighbors=8, leaf_size=40, max_window_size=200) cnt = 0 max_samples = 1000 predictions = array('i') correct_predictions = 0 wait_samples = 20 while cnt < max_samples: X, y = stream.next_sample() # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): predictions.append(learner.predict(X)[0]) if y[0] == predictions[-1]: correct_predictions += 1 learner.partial_fit(X, y) cnt += 1 expected_predictions = array('i', [ 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1 ]) assert np.alltrue(predictions == expected_predictions) expected_correct_predictions = 46 assert correct_predictions == expected_correct_predictions learner.reset() assert learner.data_window.size == 0 expected_info = "KNNADWINClassifier(leaf_size=40, max_window_size=200, " \ "metric='euclidean', n_neighbors=8)" info = " ".join([line.strip() for line in learner.get_info().split()]) assert info == expected_info stream.restart() X, y = stream.next_sample(max_samples) learner.fit(X[:950], y[:950]) predictions = learner.predict(X[951:]) correct_predictions = sum(np.array(predictions) == y[951:]) expected_correct_predictions = 47 assert correct_predictions == expected_correct_predictions assert type(learner.predict(X)) == np.ndarray assert type(learner.predict_proba(X)) == np.ndarray
def test_knn_adwin(): stream = ConceptDriftStream(stream=SEAGenerator(random_state=1), drift_stream=SEAGenerator( random_state=2, classification_function=2), random_state=1, position=250, width=10) stream.prepare_for_use() learner = KNNAdwin(n_neighbors=8, leaf_size=40, max_window_size=200) cnt = 0 max_samples = 1000 predictions = array('i') correct_predictions = 0 wait_samples = 20 while cnt < max_samples: X, y = stream.next_sample() # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): predictions.append(learner.predict(X)[0]) if y[0] == predictions[-1]: correct_predictions += 1 learner.partial_fit(X, y) cnt += 1 expected_predictions = array('i', [ 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1 ]) assert np.alltrue(predictions == expected_predictions) expected_correct_predictions = 46 assert correct_predictions == expected_correct_predictions learner.reset() assert learner.window.n_samples == 0 expected_info = 'KNNAdwin(leaf_size=40, max_window_size=200, n_neighbors=8,\n' \ ' nominal_attributes=None)' assert learner.get_info() == expected_info stream.restart() X, y = stream.next_sample(max_samples) learner.fit(X[:950], y[:950]) predictions = learner.predict(X[951:]) correct_predictions = sum(np.array(predictions) == y[951:]) expected_correct_predictions = 47 assert correct_predictions == expected_correct_predictions assert type(learner.predict(X)) == np.ndarray assert type(learner.predict_proba(X)) == np.ndarray
def test_hat_nb(test_path): stream = ConceptDriftStream(stream=SEAGenerator(random_state=1, noise_percentage=0.05), drift_stream=SEAGenerator( random_state=2, classification_function=2, noise_percentage=0.05), random_state=1, position=250, width=10) stream.prepare_for_use() learner = HAT(leaf_prediction='nb') cnt = 0 max_samples = 1000 y_pred = array('i') y_proba = [] wait_samples = 20 while cnt < max_samples: X, y = stream.next_sample() # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): y_pred.append(learner.predict(X)[0]) y_proba.append(learner.predict_proba(X)[0]) learner.partial_fit(X, y) cnt += 1 expected_predictions = array('i', [ 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1 ]) assert np.alltrue(y_pred == expected_predictions) test_file = os.path.join(test_path, 'test_hoeffding_adaptive_tree_nb.npy') data = np.load(test_file) assert np.allclose(y_proba, data) expected_info = 'HAT: max_byte_size: 33554432 - memory_estimate_period: 1000000 - grace_period: 200' \ ' - split_criterion: info_gain - split_confidence: 1e-07 - tie_threshold: 0.05' \ ' - binary_split: False - stop_mem_management: False - remove_poor_atts: False' \ ' - no_pre_prune: False - leaf_prediction: nb - nb_threshold: 0' \ ' - nominal_attributes: [] - ' assert learner.get_info() == expected_info assert type(learner.predict(X)) == np.ndarray assert type(learner.predict_proba(X)) == np.ndarray
def test_hoeffding_adaptive_tree_nb(test_path): stream = ConceptDriftStream(stream=SEAGenerator(random_state=1, noise_percentage=0.05), drift_stream=SEAGenerator( random_state=2, classification_function=2, noise_percentage=0.05), random_state=1, position=250, width=10) learner = HoeffdingAdaptiveTreeClassifier(leaf_prediction='nb', random_state=1) cnt = 0 max_samples = 1000 y_pred = array('i') y_proba = [] wait_samples = 20 while cnt < max_samples: X, y = stream.next_sample() # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): y_pred.append(learner.predict(X)[0]) y_proba.append(learner.predict_proba(X)[0]) learner.partial_fit(X, y) cnt += 1 expected_predictions = array('i', [ 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1 ]) assert np.alltrue(y_pred == expected_predictions) test_file = os.path.join(test_path, 'test_hoeffding_adaptive_tree_nb.npy') data = np.load(test_file) assert np.allclose(y_proba, data) expected_info = "HoeffdingAdaptiveTreeClassifier(binary_split=False, bootstrap_sampling=True, grace_period=200, " \ "leaf_prediction='nb', max_byte_size=33554432, memory_estimate_period=1000000, nb_threshold=0, " \ "no_preprune=False, nominal_attributes=None, random_state=1, remove_poor_atts=False, " \ "split_confidence=1e-07, split_criterion='info_gain', stop_mem_management=False, tie_threshold=0.05)" info = " ".join([line.strip() for line in learner.get_info().split()]) assert info == expected_info assert type(learner.predict(X)) == np.ndarray assert type(learner.predict_proba(X)) == np.ndarray
def test_concept_drift_stream(test_path): stream = ConceptDriftStream(random_state=1, position=20, width=5) assert stream.n_remaining_samples() == -1 expected_names = ["salary", "commission", "age", "elevel", "car", "zipcode", "hvalue", "hyears", "loan"] assert stream.feature_names == expected_names expected_targets = [0, 1] assert stream.target_values == expected_targets assert stream.target_names == ['target'] assert stream.n_features == 9 assert stream.n_cat_features == 3 assert stream.n_num_features == 6 assert stream.n_targets == 1 assert stream.has_more_samples() is True assert stream.is_restartable() is True # Load test data corresponding to first 10 instances test_file = os.path.join(test_path, 'concept_drift_stream.npz') data = np.load(test_file) X_expected = data['X'] y_expected = data['y'] X, y = stream.next_sample() assert np.alltrue(X[0] == X_expected[0]) assert np.alltrue(y[0] == y_expected[0]) X, y = stream.last_sample() assert np.alltrue(X[0] == X_expected[0]) assert np.alltrue(y[0] == y_expected[0]) stream.restart() X, y = stream.next_sample(30) assert np.alltrue(X == X_expected) assert np.alltrue(y == y_expected) assert stream.n_targets == np.array(y).ndim assert stream.n_features == X.shape[1] assert 'stream' == stream._estimator_type expected_info = "ConceptDriftStream(alpha=0.0,\n" \ " drift_stream=AGRAWALGenerator(balance_classes=False,\n" \ " classification_function=2,\n" \ " perturbation=0.0,\n" \ " random_state=112),\n" \ " position=20, random_state=1,\n" \ " stream=AGRAWALGenerator(balance_classes=False,\n" \ " classification_function=0,\n" \ " perturbation=0.0, random_state=112),\n" \ " width=5)" assert stream.get_info() == expected_info
window_size=WINDOW_SIZE, logging=False) goowe_2.prepare_post_analysis_req(num_features, num_targets, num_classes, target_values) goowe_3 = GooweMS(goowe_1, goowe_2, n_max_components=N_MAX_CLASSIFIERS, chunk_size=CHUNK_SIZE, window_size=WINDOW_SIZE, logging=False) goowe_3.prepare_post_analysis_req(num_features, num_targets, num_classes, target_values) # For the first chunk, there is no prediction. X_init, y_init = stream_1.next_sample(CHUNK_SIZE) goowe_1.partial_fit(X_init, y_init) X_init, y_init = stream_2.next_sample(CHUNK_SIZE) goowe_2.partial_fit(X_init, y_init) X_init, y_init = stream_3.next_sample(CHUNK_SIZE) #a = goowe_3.predict(X_init) #print('==============', a) goowe_3.update(X_init, y_init, 1, 1) # TODO: update_from(goowe_1, goowe_2) :: updates existing goowe by selecting N_MAX_CLASSIFIERS / 2 components from each of them. accuracy_1 = 0.0 total_1 = 0.0 true_predictions_1 = 0.0
# Imports from skmultiflow.data.sine_generator import SineGenerator import matplotlib.pyplot as plt from skmultiflow.data import ConceptDriftStream import numpy as np # Setting up the stream # stream = SineGenerator(classification_function = 2, random_state = 112, # balance_classes = False, has_noise = False) stream = ConceptDriftStream(random_state=123456, position=25000) # Retrieving one sample # stream.generate_drift() a = stream.next_sample(100) b = [] for i in range(a[0].size): b.append(a[0].item(i)) plt.plot(b) # fig= plt.gcf() # fig.set_size_inches(20, 5.5) # plt.ylabel('value') # plt.xlabel('Time') # plt.show() # b=[] # for i in range(a[1].size): # b.append(a[1].item(i)) # # plt.plot(b,color='r', linestyle='--',linewidth=0.3) fig = plt.gcf() fig.set_size_inches(20, 5.5)