def test_hat_mc(test_path): stream = ConceptDriftStream(stream=SEAGenerator(random_state=1, noise_percentage=0.05), drift_stream=SEAGenerator( random_state=2, classification_function=2, noise_percentage=0.05), random_state=1, position=250, width=10) stream.prepare_for_use() learner = HAT(leaf_prediction='mc') cnt = 0 max_samples = 1000 y_pred = array('i') y_proba = [] wait_samples = 20 while cnt < max_samples: X, y = stream.next_sample() # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): y_pred.append(learner.predict(X)[0]) y_proba.append(learner.predict_proba(X)[0]) learner.partial_fit(X, y) cnt += 1 expected_predictions = array('i', [ 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1 ]) assert np.alltrue(y_pred == expected_predictions) test_file = os.path.join(test_path, 'test_hoeffding_adaptive_tree_mc.npy') data = np.load(test_file) assert np.allclose(y_proba, data) expected_info = 'HAT: max_byte_size: 33554432 - memory_estimate_period: 1000000 - grace_period: 200' \ ' - split_criterion: info_gain - split_confidence: 1e-07 - tie_threshold: 0.05' \ ' - binary_split: False - stop_mem_management: False - remove_poor_atts: False' \ ' - no_pre_prune: False - leaf_prediction: mc - nb_threshold: 0' \ ' - nominal_attributes: [] - ' assert learner.get_info() == expected_info expected_model_1 = 'Leaf = Class 1.0 | {0.0: 0.005295278636481529, 1.0: 1.9947047213635185}\n' expected_model_2 = 'Leaf = Class 1.0 | {0.0: 0.0052952786364815294, 1.0: 1.9947047213635185}\n' expected_model_3 = 'Leaf = Class 1.0 | {1.0: 1.9947047213635185, 0.0: 0.0052952786364815294}\n' assert (learner.get_model_description() == expected_model_1) \ or (learner.get_model_description() == expected_model_2) \ or (learner.get_model_description() == expected_model_3) stream.restart() X, y = stream.next_sample(5000) learner = HAT(max_byte_size=30, leaf_prediction='mc', grace_period=10) learner.partial_fit(X, y)
def test_hat_mc(test_path): stream = ConceptDriftStream(stream=SEAGenerator(random_state=1, noise_percentage=0.05), drift_stream=SEAGenerator( random_state=2, classification_function=2, noise_percentage=0.05), random_state=1, position=250, width=10) stream.prepare_for_use() learner = HAT(leaf_prediction='mc') cnt = 0 max_samples = 1000 y_pred = array('i') y_proba = [] wait_samples = 20 while cnt < max_samples: X, y = stream.next_sample() # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): y_pred.append(learner.predict(X)[0]) y_proba.append(learner.predict_proba(X)[0]) learner.partial_fit(X, y) cnt += 1 expected_predictions = array('i', [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ]) assert np.alltrue(y_pred == expected_predictions) test_file = os.path.join(test_path, 'test_hoeffding_adaptive_tree_mc.npy') data = np.load(test_file) assert np.allclose(y_proba, data) expected_info = "HAT(binary_split=False, grace_period=200, leaf_prediction='mc',\n" \ " max_byte_size=33554432, memory_estimate_period=1000000, nb_threshold=0,\n" \ " no_preprune=False, nominal_attributes=None, remove_poor_atts=False,\n" \ " split_confidence=1e-07, split_criterion='info_gain',\n" \ " stop_mem_management=False, tie_threshold=0.05)" assert learner.get_info() == expected_info expected_model_1 = 'Leaf = Class 1.0 | {0.0: 398.0, 1.0: 1000.0}\n' assert (learner.get_model_description() == expected_model_1) assert type(learner.predict(X)) == np.ndarray assert type(learner.predict_proba(X)) == np.ndarray stream.restart() X, y = stream.next_sample(5000) learner = HAT(max_byte_size=30, leaf_prediction='mc', grace_period=10) learner.partial_fit(X, y)
def test_hat_nb(test_path): stream = ConceptDriftStream(stream=SEAGenerator(random_state=1, noise_percentage=0.05), drift_stream=SEAGenerator( random_state=2, classification_function=2, noise_percentage=0.05), random_state=1, position=250, width=10) stream.prepare_for_use() learner = HAT(leaf_prediction='nb') cnt = 0 max_samples = 1000 y_pred = array('i') y_proba = [] wait_samples = 20 while cnt < max_samples: X, y = stream.next_sample() # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): y_pred.append(learner.predict(X)[0]) y_proba.append(learner.predict_proba(X)[0]) learner.partial_fit(X, y) cnt += 1 expected_predictions = array('i', [ 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1 ]) assert np.alltrue(y_pred == expected_predictions) test_file = os.path.join(test_path, 'test_hoeffding_adaptive_tree_nb.npy') data = np.load(test_file) assert np.allclose(y_proba, data) expected_info = 'HAT: max_byte_size: 33554432 - memory_estimate_period: 1000000 - grace_period: 200' \ ' - split_criterion: info_gain - split_confidence: 1e-07 - tie_threshold: 0.05' \ ' - binary_split: False - stop_mem_management: False - remove_poor_atts: False' \ ' - no_pre_prune: False - leaf_prediction: nb - nb_threshold: 0' \ ' - nominal_attributes: [] - ' assert learner.get_info() == expected_info assert type(learner.predict(X)) == np.ndarray assert type(learner.predict_proba(X)) == np.ndarray
def test_hoeffding_adaptive_tree_nb(test_path): stream = ConceptDriftStream(stream=SEAGenerator(random_state=1, noise_percentage=0.05), drift_stream=SEAGenerator( random_state=2, classification_function=2, noise_percentage=0.05), random_state=1, position=250, width=10) learner = HoeffdingAdaptiveTreeClassifier(leaf_prediction='nb', random_state=1) cnt = 0 max_samples = 1000 y_pred = array('i') y_proba = [] wait_samples = 20 while cnt < max_samples: X, y = stream.next_sample() # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): y_pred.append(learner.predict(X)[0]) y_proba.append(learner.predict_proba(X)[0]) learner.partial_fit(X, y) cnt += 1 expected_predictions = array('i', [ 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1 ]) assert np.alltrue(y_pred == expected_predictions) test_file = os.path.join(test_path, 'test_hoeffding_adaptive_tree_nb.npy') data = np.load(test_file) assert np.allclose(y_proba, data) expected_info = "HoeffdingAdaptiveTreeClassifier(binary_split=False, bootstrap_sampling=True, grace_period=200, " \ "leaf_prediction='nb', max_byte_size=33554432, memory_estimate_period=1000000, nb_threshold=0, " \ "no_preprune=False, nominal_attributes=None, random_state=1, remove_poor_atts=False, " \ "split_confidence=1e-07, split_criterion='info_gain', stop_mem_management=False, tie_threshold=0.05)" info = " ".join([line.strip() for line in learner.get_info().split()]) assert info == expected_info assert type(learner.predict(X)) == np.ndarray assert type(learner.predict_proba(X)) == np.ndarray
def next_sample(self, batch_size=1): if batch_size > 1: print("Only batch size of 1 for now") return None if not self.in_drift: samples = self.concepts[self.current_concept].next_sample( batch_size) else: samples = self.transition_stream.next_sample(batch_size) last_switch_point = 0 - self.window_size // 2 next_switch_point = self.num_samples + self.window_size self.example_count += batch_size for concept_switch_index in sorted(self.concept_chain.keys()): if (concept_switch_index <= self.example_count): last_switch_point = concept_switch_index if concept_switch_index >= self.example_count: next_switch_point = concept_switch_index break self.drifted = False if not self.in_drift: # print(f"START GRADUAL DRIFT FROM {self.concept_chain[last_switch_point]} TO {self.concept_chain[next_switch_point]}") if self.example_count >= next_switch_point - self.window_size // 2: # print(f"{self.example_count}: START GRADUAL DRIFT FROM {self.concept_chain[last_switch_point]} TO {self.concept_chain[next_switch_point]}") self.in_drift = True self.drift_switch = True self.transition_stream = ConceptDriftStream( stream=self.concepts[self.concept_chain[last_switch_point]] .get_datastream(), drift_stream=self.concepts[self.concept_chain[ next_switch_point]].get_datastream(), position=self.window_size // 2, width=self.window_size) self.transition_stream.prepare_for_use() else: if self.example_count == next_switch_point: self.current_concept = self.concept_chain[next_switch_point] self.drifted = True self.drift_switch = False # print(f"{self.example_count}: SWITCH POINT") if self.example_count >= (last_switch_point + self.window_size // 2) and not self.drift_switch: self.in_drift = False # print(f"{self.example_count}: END GRADUAL DRIFT FROM {self.concept_chain[last_switch_point]} TO {self.concept_chain[next_switch_point]}") return samples
def get_conceptdrift_data_generated(self, classification_function=0, noise_percentage=0.1, random_state=112, drift_classification_function=3, drift_random_state=112, drift_noise_percentage=0.0, drift_start_position=5000, drift_width=1000, n_num_features=2, n_cat_features=0): from skmultiflow.data import ConceptDriftStream from skmultiflow.data import AGRAWALGenerator stream = AGRAWALGenerator( classification_function=classification_function, perturbation=noise_percentage, random_state=random_state #,n_num_features = n_num_features, n_cat_features = n_cat_features ) drift_stream = AGRAWALGenerator( classification_function=drift_classification_function, perturbation=drift_noise_percentage, random_state=drift_random_state #,n_num_features = n_num_features, n_cat_features = n_cat_features ) return ConceptDriftStream(stream=stream, drift_stream=drift_stream, position=drift_start_position, width=drift_width)
def test_knn_adwin(): stream = ConceptDriftStream(stream=SEAGenerator(random_state=1), drift_stream=SEAGenerator( random_state=2, classification_function=2), random_state=1, position=250, width=10) stream.prepare_for_use() learner = KNNADWINClassifier(n_neighbors=8, leaf_size=40, max_window_size=200) cnt = 0 max_samples = 1000 predictions = array('i') correct_predictions = 0 wait_samples = 20 while cnt < max_samples: X, y = stream.next_sample() # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): predictions.append(learner.predict(X)[0]) if y[0] == predictions[-1]: correct_predictions += 1 learner.partial_fit(X, y) cnt += 1 expected_predictions = array('i', [ 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1 ]) assert np.alltrue(predictions == expected_predictions) expected_correct_predictions = 46 assert correct_predictions == expected_correct_predictions learner.reset() assert learner.window.n_samples == 0 expected_info = 'KNNADWINClassifier(leaf_size=40, max_window_size=200, n_neighbors=8, nominal_attributes=None)' info = " ".join([line.strip() for line in learner.get_info().split()]) assert info == expected_info stream.restart() X, y = stream.next_sample(max_samples) learner.fit(X[:950], y[:950]) predictions = learner.predict(X[951:]) correct_predictions = sum(np.array(predictions) == y[951:]) expected_correct_predictions = 47 assert correct_predictions == expected_correct_predictions assert type(learner.predict(X)) == np.ndarray assert type(learner.predict_proba(X)) == np.ndarray
def test_concept_drift_stream_with_alpha(test_path): stream = ConceptDriftStream(alpha=0.01, random_state=1, position=20) expected_info = "ConceptDriftStream(alpha=0.01,\n" \ " drift_stream=AGRAWALGenerator(balance_classes=False,\n" \ " classification_function=2,\n" \ " perturbation=0.0,\n" \ " random_state=112),\n" \ " position=20, random_state=1,\n" \ " stream=AGRAWALGenerator(balance_classes=False,\n" \ " classification_function=0,\n" \ " perturbation=0.0, random_state=112),\n" \ " width=5729)" assert stream.get_info() == expected_info with pytest.warns(FutureWarning) as actual_warning: ConceptDriftStream(alpha=0, random_state=1, position=20) assert actual_warning[0].message.args[0] == "Default value for 'alpha' has changed from 0 " \ "to None. 'alpha=0' will throw an error from v0.7.0"
def test_leverage_bagging_me(): stream = ConceptDriftStream(position=500, width=100, random_state=112) nb = NaiveBayes() # leveraging_bag_me learner = LeveragingBaggingClassifier( base_estimator=nb, n_estimators=5, random_state=112, leverage_algorithm='leveraging_bag_me') y_expected = np.asarray([ 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0 ], dtype=np.int) run_prequential_supervised(stream, learner, max_samples=2000, n_wait=40, y_expected=y_expected)
def test_concept_drift_stream(test_path): stream = ConceptDriftStream(random_state=1, position=20, width=5) stream.prepare_for_use() assert stream.n_remaining_samples() == -1 expected_names = [ "salary", "commission", "age", "elevel", "car", "zipcode", "hvalue", "hyears", "loan" ] assert stream.feature_names == expected_names expected_targets = [0, 1] assert stream.target_values == expected_targets assert stream.target_names == ['target'] assert stream.n_features == 9 assert stream.n_cat_features == 3 assert stream.n_num_features == 6 assert stream.n_targets == 1 assert stream.get_info() == 'ConceptDriftStream: ' \ 'First Stream: AGRAWALGenerator - ' \ 'Drift Stream: AGRAWALGenerator - ' \ 'alpha: 0.0 - position: 20 - width: 5' assert stream.has_more_samples() is True assert stream.is_restartable() is True # Load test data corresponding to first 10 instances test_file = os.path.join(test_path, 'concept_drift_stream.npz') data = np.load(test_file) X_expected = data['X'] y_expected = data['y'] X, y = stream.next_sample() assert np.alltrue(X[0] == X_expected[0]) assert np.alltrue(y[0] == y_expected[0]) X, y = stream.last_sample() assert np.alltrue(X[0] == X_expected[0]) assert np.alltrue(y[0] == y_expected[0]) stream.restart() X, y = stream.next_sample(30) assert np.alltrue(X == X_expected) assert np.alltrue(y == y_expected) assert stream.n_targets == np.array(y).ndim assert stream.n_features == X.shape[1] assert 'stream' == stream.get_class_type()
from dyn2sel.apply_dcs import DYNSEMethod from dyn2sel.dcs_techniques import KNORAU from skmultiflow.data import RandomTreeGenerator, ConceptDriftStream from skmultiflow.evaluation import EvaluatePrequential from skmultiflow.bayes import NaiveBayes from skmultiflow.meta import OzaBagging generator = ConceptDriftStream( stream=RandomTreeGenerator(sample_random_state=42, tree_random_state=42), drift_stream=RandomTreeGenerator(sample_random_state=43, tree_random_state=43), position=2500, width=1, ) dynse = DYNSEMethod(NaiveBayes(), 200, KNORAU(), max_ensemble_size=10) ozabag = OzaBagging(NaiveBayes(), n_estimators=10) evaluator = EvaluatePrequential(max_samples=5000, n_wait=200, batch_size=200, pretrain_size=0) evaluator.evaluate(generator, [dynse, ozabag], ["Dynse", "Ozabag"])
evaluate1 = EvaluatePrequential(show_plot=False, pretrain_size=400, max_samples=10000, metrics=['accuracy']) evaluate1.evaluate(stream=dstream, model=ht_class) ################################################### # Hoeffding Adaptive tree from skmultiflow.trees import HoeffdingAdaptiveTreeClassifier from skmultiflow.data import ConceptDriftStream from skmultiflow.evaluation import EvaluatePrequential from skmultiflow.evaluation import EvaluateHoldout # Simulate a sample data stream ds = ConceptDriftStream(random_state=777, position=30000) ds # Output: #ConceptDriftStream(alpha=0.0, # drift_stream=AGRAWALGenerator(balance_classes=False, # classification_function=2, # perturbation=0.0, # random_state=112), # position=30000, random_state=777, # stream=AGRAWALGenerator(balance_classes=False, # classification_function=0, # perturbation=0.0, random_state=112), # width=1000) # Instantiate the model object model_hat = HoeffdingAdaptiveTreeClassifier()
# stream = ConceptDriftStream(STAGGERGenerator(classification_function=0), # STAGGERGenerator(classification_function=2), # position=n_samples/2, # width=n_samples/5) # streams.append(stream) """Abrupt STAGGER""" # stream = ConceptDriftStream(STAGGERGenerator(classification_function=0), # STAGGERGenerator(classification_function=2), # position=n_samples/2, # alpha=90.0) # streams.append(stream) # """Gradual SEA""" stream = ConceptDriftStream(SEAGenerator(classification_function=0), SEAGenerator(classification_function=2), position=N_SAMPLES/2, width=N_SAMPLES/5) stream.name = 'SEA GRADUAL' STREAMS.append(stream) """Abrupt SEA""" stream = ConceptDriftStream(SEAGenerator(classification_function=0), SEAGenerator(classification_function=1), alpha=90.0, position=N_SAMPLES / 2) stream.name = 'SEA ABRUPBT' STREAMS.append(stream) """GRADUAL LED""" stream = ConceptDriftStream(LEDGeneratorDrift(has_noise=False, noise_percentage=0.0, n_drift_features=3), drift_stream=LEDGeneratorDrift( has_noise=False, noise_percentage=0.0, n_drift_features=7),
class RecurringConceptGradualStream(RecurringConceptStream): def __init__(self, rctype, num_samples, noise, concept_chain, window_size=1000, seed=None, desc=None, boost_first_occurance=True): self.in_drift = False self.drift_switch = False self.window_size = window_size self.transition_stream = None super().__init__(rctype, num_samples, noise, concept_chain, seed=seed, desc=desc, boost_first_occurance=boost_first_occurance) def next_sample(self, batch_size=1): if batch_size > 1: print("Only batch size of 1 for now") return None if not self.in_drift: samples = self.concepts[self.current_concept].next_sample( batch_size) else: samples = self.transition_stream.next_sample(batch_size) last_switch_point = 0 - self.window_size // 2 next_switch_point = self.num_samples + self.window_size self.example_count += batch_size for concept_switch_index in sorted(self.concept_chain.keys()): if (concept_switch_index <= self.example_count): last_switch_point = concept_switch_index if concept_switch_index >= self.example_count: next_switch_point = concept_switch_index break self.drifted = False if not self.in_drift: # print(f"START GRADUAL DRIFT FROM {self.concept_chain[last_switch_point]} TO {self.concept_chain[next_switch_point]}") if self.example_count >= next_switch_point - self.window_size // 2: # print(f"{self.example_count}: START GRADUAL DRIFT FROM {self.concept_chain[last_switch_point]} TO {self.concept_chain[next_switch_point]}") self.in_drift = True self.drift_switch = True self.transition_stream = ConceptDriftStream( stream=self.concepts[self.concept_chain[last_switch_point]] .get_datastream(), drift_stream=self.concepts[self.concept_chain[ next_switch_point]].get_datastream(), position=self.window_size // 2, width=self.window_size) self.transition_stream.prepare_for_use() else: if self.example_count == next_switch_point: self.current_concept = self.concept_chain[next_switch_point] self.drifted = True self.drift_switch = False # print(f"{self.example_count}: SWITCH POINT") if self.example_count >= (last_switch_point + self.window_size // 2) and not self.drift_switch: self.in_drift = False # print(f"{self.example_count}: END GRADUAL DRIFT FROM {self.concept_chain[last_switch_point]} TO {self.concept_chain[next_switch_point]}") return samples
max_depth = 6 # Max depth for each tree in the ensemble max_window_size = 1000 # Max window size min_window_size = 1 # set to activate the dynamic window strategy detect_drift = False # Enable/disable drift detection AXGBp = AdaptiveXGBoostClassifier(update_strategy='push', n_estimators=n_estimators, learning_rate=learning_rate, max_depth=max_depth, max_window_size=max_window_size, min_window_size=min_window_size, detect_drift=detect_drift) AXGBr = AdaptiveXGBoostClassifier(update_strategy='replace', n_estimators=n_estimators, learning_rate=learning_rate, max_depth=max_depth, max_window_size=max_window_size, min_window_size=min_window_size, detect_drift=detect_drift) stream = ConceptDriftStream(random_state=1000, position=5000) # stream.prepare_for_use() # Required for skmultiflow v0.4.1 evaluator = EvaluatePrequential(pretrain_size=0, max_samples=20000, show_plot=True) evaluator.evaluate(stream=stream, model=[AXGBp, AXGBr], model_names=['AXGBp', 'AXGBr'])
def test_concept_drift_stream(test_path): stream = ConceptDriftStream(random_state=1, position=20, width=5) assert stream.n_remaining_samples() == -1 expected_names = ["salary", "commission", "age", "elevel", "car", "zipcode", "hvalue", "hyears", "loan"] assert stream.feature_names == expected_names expected_targets = [0, 1] assert stream.target_values == expected_targets assert stream.target_names == ['target'] assert stream.n_features == 9 assert stream.n_cat_features == 3 assert stream.n_num_features == 6 assert stream.n_targets == 1 assert stream.has_more_samples() is True assert stream.is_restartable() is True # Load test data corresponding to first 10 instances test_file = os.path.join(test_path, 'concept_drift_stream.npz') data = np.load(test_file) X_expected = data['X'] y_expected = data['y'] X, y = stream.next_sample() assert np.alltrue(X[0] == X_expected[0]) assert np.alltrue(y[0] == y_expected[0]) X, y = stream.last_sample() assert np.alltrue(X[0] == X_expected[0]) assert np.alltrue(y[0] == y_expected[0]) stream.restart() X, y = stream.next_sample(30) assert np.alltrue(X == X_expected) assert np.alltrue(y == y_expected) assert stream.n_targets == np.array(y).ndim assert stream.n_features == X.shape[1] assert 'stream' == stream._estimator_type expected_info = "ConceptDriftStream(alpha=0.0,\n" \ " drift_stream=AGRAWALGenerator(balance_classes=False,\n" \ " classification_function=2,\n" \ " perturbation=0.0,\n" \ " random_state=112),\n" \ " position=20, random_state=1,\n" \ " stream=AGRAWALGenerator(balance_classes=False,\n" \ " classification_function=0,\n" \ " perturbation=0.0, random_state=112),\n" \ " width=5)" assert stream.get_info() == expected_info
DATASE_SIZE = 25000 def run(generator, filename='generated', n=10000): print(f'Generating dataset from generator ', filename) generator.prepare_for_use() X, y = generator.next_sample(n) X = pd.DataFrame(X) y = pd.DataFrame(y, dtype='int32') df = pd.concat([X, y], axis=1) df.to_csv(f'_datasets/{filename}.csv', header=None, index=None) if __name__ == "__main__": agrawal = ConceptDriftStream(alpha=45.0, position=DATASE_SIZE / 2) run(agrawal, 'agrawal_gen', DATASE_SIZE) sea = ConceptDriftStream(SEAGenerator(classification_function=1, noise_percentage=0.13), SEAGenerator(classification_function=2, noise_percentage=0.13), position=DATASE_SIZE / 2) run(sea, 'sea_gen', DATASE_SIZE) led = LEDGeneratorDrift(has_noise=True, noise_percentage=0.28, n_drift_features=4) run(led, 'led_gen', DATASE_SIZE) stagger = ConceptDriftStream(STAGGERGenerator(classification_function=1,
from skmultiflow.data import ConceptDriftStream from skmultiflow.data import AGRAWALGenerator import logging from GooweMS import GooweMS import random logger = logging.getLogger() logger.setLevel(logging.INFO) # Prepare the data stream stream_1 = ConceptDriftStream( stream=AGRAWALGenerator(balance_classes=False, classification_function=1, perturbation=0.0, random_state=112), drift_stream=AGRAWALGenerator(balance_classes=False, classification_function=2, perturbation=0.0, random_state=112), position=3000, width=1000, random_state=None, alpha=0.0) stream_2 = ConceptDriftStream( stream=AGRAWALGenerator(balance_classes=False, classification_function=3, perturbation=0.0, random_state=21), drift_stream=AGRAWALGenerator(balance_classes=False, classification_function=1, perturbation=0.0, random_state=22),
class RecurringConceptGradualStream(RecurringConceptStream): """ A stream featuring gradual drift between given concepts. Uses the scikit-multiflow concept drift stream to blend concepts over a window. Parameters ---------- rctype: RCStreamType An enum describing the type of stream num_samples: int The number of samples in the stream noise: float The probability that noise will happen in the generation. At each new sample generated, the sample with will perturbed by the amount of perturbation. Values go from 0.0 to 1.0. concept_chain: list<int> or dict A dict with key observation number and value the concept begining at that observation or A list of concept ids. A dict will be generated with each concept lasting its length given in desc or uniform length. window_size: int The number of observations each gradual drift is spread over. seed: int Random seed. desc: dict<int><conceptOccurence> A map of concept ID to options boost_first_occurance: bool If true, double the observations drawn from the first occurence of a concept. Allows a better model to be built and stored. Examples -------- >>> # An example stream using the STAGGER Generator. >>> # Starts using generating function 0, then at >>> # observation 5000 transitions to generating function >>> # 1 then at 10000 transitions back to 0. >>> from skika.data.synthetic.reccurring_concept_stream import RCStreamType, RecurringConceptGradualStream, conceptOccurence >>> concept_chain = {0: 0, 5000: 1, 10000: 0} >>> num_samples = 15000 >>> # init concept >>> concept_0 = conceptOccurence(id = 0, difficulty = 2, noise = 0, appearences = 2, examples_per_appearence = 5000) >>> concept_1 = conceptOccurence(id = 1, difficulty = 3, noise = 0, appearences = 1, examples_per_appearence = 5000) >>> desc = {0: concept_0, 1: concept_1} >>> datastream = RecurringConceptGradualStream( rctype = RCStreamType.STAGGER, num_samples =num_samples, noise = 0, concept_chain = concept_chain, window_size = 1000, seed = 42, desc = desc, boost_first_occurance = False) >>> datastream.has_more_samples() True >>> datastream.get_drift_info() {0: 0, 5000: 1, 10000: 0} >>> datastream.n_remaining_samples() 15000 >>> datastream.get_stream_info() {0: 0, 5000: 1, 10000: 0} 0 - 5000: STAGGERGenerator(balance_classes=False, classification_function=0, random_state=42) 5000 - 10000: STAGGERGenerator(balance_classes=False, classification_function=1, random_state=43) 10000 - 15000: STAGGERGenerator(balance_classes=False, classification_function=0, random_state=42) >>> datastream.get_moa_stream_info() {0: 0, 5000: 1, 10000: 0} '(ConceptDriftStream -s (generators.STAGGERGenerator -f 1 -i 42) -d (ConceptDriftStream -s (generators.STAGGERGenerator -f 2 -i 43) -d (generators.STAGGERGenerator -f 1 -i 42) -p 5000 -w 1) -p 5000 -w 1)' >>> datastream.get_supplementary_info() >>> datastream.next_sample() (array([[2., 0., 2.]]), array([0])) >>> datastream.n_remaining_samples() 14999 >>> datastream.next_sample() (array([[2., 0., 0.]]), array([0])) >>> datastream.n_remaining_samples() 14998 """ def __init__(self, rctype, num_samples, noise, concept_chain, window_size=1000, seed=None, desc=None, boost_first_occurance=True): self.in_drift = False self.drift_switch = False self.window_size = window_size self.transition_stream = None super().__init__(rctype, num_samples, noise, concept_chain, seed=seed, desc=desc, boost_first_occurance=boost_first_occurance) def next_sample(self, batch_size=1): if batch_size > 1: print("Only batch size of 1 for now") return None if not self.in_drift: samples = self.concepts[self.current_concept].next_sample( batch_size) else: samples = self.transition_stream.next_sample(batch_size) last_switch_point = 0 - self.window_size // 2 next_switch_point = self.num_samples + self.window_size self.example_count += batch_size for concept_switch_index in sorted(self.concept_chain.keys()): if (concept_switch_index <= self.example_count): last_switch_point = concept_switch_index if concept_switch_index >= self.example_count: next_switch_point = concept_switch_index break self.drifted = False if not self.in_drift: if self.example_count >= next_switch_point - self.window_size // 2: self.in_drift = True self.drift_switch = True self.transition_stream = ConceptDriftStream( stream=self.concepts[self.concept_chain[last_switch_point]] .get_datastream(), drift_stream=self.concepts[self.concept_chain[ next_switch_point]].get_datastream(), position=self.window_size // 2, width=self.window_size) self.transition_stream.prepare_for_use() else: if self.example_count == next_switch_point: self.current_concept = self.concept_chain[next_switch_point] self.drifted = True self.drift_switch = False if self.example_count >= (last_switch_point + self.window_size // 2) and not self.drift_switch: self.in_drift = False return samples
def calculateStatistic(array): avg = np.average(array) std = np.std(array) return avg, std randomStates = np.arange(1, 25) DRIFT_CENTRAL = 8000 DRIFT_WIDTH = 1000 DRIFT_BORDER = np.round(DRIFT_CENTRAL - DRIFT_WIDTH / 2) driftStreams = [ ConceptDriftStream(width=DRIFT_WIDTH, position=DRIFT_BORDER + 2000, random_state=i) for i in randomStates ] ##EKSPERYENT 1 adwin_param = [0.002, 0.005, 0.01] ddm_param = [3, 5, 7] ks_param1 = [100, 150, 200] ks_param2 = [30, 50, 100] ph_param1 = [25, 50, 75] ph_param2 = [0.005, 0.01, 0.02] knn = KNNClassifier() stream = driftStreams[0]
# Imports from skmultiflow.data.sine_generator import SineGenerator import matplotlib.pyplot as plt from skmultiflow.data import ConceptDriftStream import numpy as np # Setting up the stream # stream = SineGenerator(classification_function = 2, random_state = 112, # balance_classes = False, has_noise = False) stream = ConceptDriftStream(random_state=123456, position=25000) # Retrieving one sample # stream.generate_drift() a = stream.next_sample(100) b = [] for i in range(a[0].size): b.append(a[0].item(i)) plt.plot(b) # fig= plt.gcf() # fig.set_size_inches(20, 5.5) # plt.ylabel('value') # plt.xlabel('Time') # plt.show() # b=[] # for i in range(a[1].size): # b.append(a[1].item(i)) # # plt.plot(b,color='r', linestyle='--',linewidth=0.3) fig = plt.gcf() fig.set_size_inches(20, 5.5)