def test_vfdr(): learner = VFDR(ordered_rules=True, rule_prediction='first_hit', nominal_attributes=[3,4,5], expand_criterion='info_gain', remove_poor_atts=True, min_weight=100, nb_prediction=False) stream = AGRAWALGenerator(random_state=11) stream.prepare_for_use() cnt = 0 max_samples = 5000 predictions = array('i') proba_predictions = [] wait_samples = 100 while cnt < max_samples: X, y = stream.next_sample() # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): predictions.append(learner.predict(X)[0]) proba_predictions.append(learner.predict_proba(X)[0]) learner.partial_fit(X, y) cnt += 1 expected_predictions = array('i', [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0]) assert np.alltrue(predictions == expected_predictions) expected_info = 'VFDR: ordered_rules: True - grace_period: 200 - split_confidence: 1e-07 ' + \ '- tie_threshold: 0.05 - remove_poor_atts: True - rule_prediction: first_hit ' + \ '- nb_threshold: 0 - nominal_attributes: [3, 4, 5] - drift_detector: NoneType ' + \ '- Predict using Naive Bayes: False' assert learner.get_info() == expected_info expected_model_description = 'Rule 0 :Att (2) <= 39.550| class :0 {0: 1365.7101742993455}\n' + \ 'Rule 1 :Att (2) <= 58.180| class :1 {1: 1269.7307449971418}\n' + \ 'Rule 2 :Att (2) <= 60.910| class :0 {0: 66.24158839706533, 1: 54.0}\n' + \ 'Default Rule :| class :0 {0: 1316.7584116029348}' expected_model_description_ = 'Rule 0 :Att (2) <= 39.550| class :0 {0: 1365.7101742993455}\n' + \ 'Rule 1 :Att (2) <= 58.180| class :1 {1: 1269.7307449971418}\n' + \ 'Rule 2 :Att (2) <= 60.910| class :0 {0: 66.241588397065328, 1: 54.0}\n' + \ 'Default Rule :| class :0 {0: 1316.7584116029348}' assert (learner.get_model_description() == expected_model_description) or \ (learner.get_model_description() == expected_model_description_) expected_model_measurements = {'Number of rules: ': 3, 'model_size in bytes': 62295} expected_model_measurements_ = {'Number of rules: ': 3, 'model_size in bytes': 73167} if sys.version_info.minor != 6: assert (learner.get_model_measurements() == expected_model_measurements) or\ (learner.get_model_measurements() == expected_model_measurements_)
def test_vfdr_hellinger(): learner = VFDR(ordered_rules=False, rule_prediction='weighted_sum', nominal_attributes=[3, 4, 5], expand_criterion='hellinger', remove_poor_atts=True, min_weight=100, nb_prediction=True) stream = AGRAWALGenerator(random_state=11) stream.prepare_for_use() cnt = 0 max_samples = 5000 predictions = array('i') proba_predictions = [] wait_samples = 100 while cnt < max_samples: X, y = stream.next_sample() # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): predictions.append(learner.predict(X)[0]) proba_predictions.append(learner.predict_proba(X)[0]) learner.partial_fit(X, y) cnt += 1 expected_predictions = array('i', [ 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0 ]) assert np.alltrue(predictions == expected_predictions) expected_model_description = 'Rule 0 :Att (2) > 58.180 and Att (5) = 4.000| class :0 {0: 202.0, 1: 3.0}\n' + \ 'Rule 1 :Att (2) <= 41.820| class :0 {0: 1387.1186637804824, 1: 151.83928023717402}\n' + \ 'Default Rule :| class :1 {0: 512.8813362195176, 1: 1356.160719762826}' expected_model_description_ = 'Rule 0 :Att (2) > 58.180 and Att (5) = 4.000| class :0 {0: 202.0, 1: 3.0}\n' + \ 'Rule 1 :Att (2) <= 41.820| class :0 {0: 1387.1186637804824, 1: 151.83928023717402}\n' + \ 'Default Rule :| class :1 {0: 512.8813362195176, 1: 1356.1607197628259}' if sys.version_info.minor != 6: assert (learner.get_model_description() == expected_model_description) or \ (learner.get_model_description() == expected_model_description_)
def test_vfdr_foil(): learner = VFDR(ordered_rules=False, rule_prediction='weighted_sum', nominal_attributes=[3, 4, 5], expand_criterion='foil_gain', remove_poor_atts=True, min_weight=100, nb_prediction=True) stream = AGRAWALGenerator(random_state=11) stream.prepare_for_use() cnt = 0 max_samples = 5000 predictions = array('i') proba_predictions = [] wait_samples = 100 while cnt < max_samples: X, y = stream.next_sample() # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): predictions.append(learner.predict(X)[0]) proba_predictions.append(learner.predict_proba(X)[0]) learner.partial_fit(X, y) cnt += 1 expected_predictions = array('i', [ 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0 ]) assert np.alltrue(predictions == expected_predictions) expected_model_description = 'Rule 0 :Att (2) <= 25.450 | class: 1| class :0 {0: 464.44730579120136}\n' + \ 'Rule 1 :Att (4) = 3.000 | class: 0| class :0 {0: 95.0, 1: 45.0}\n' + \ 'Rule 2 :Att (2) <= 30.910 | class: 1| class :0 {0: 330.68821225514125}\n' + \ 'Default Rule :| class :0 {0: 573.0, 1: 336.0}' assert (learner.get_model_description() == expected_model_description)
def test_vfdr_info_gain(): learner = VeryFastDecisionRulesClassifier(ordered_rules=True, rule_prediction='first_hit', nominal_attributes=[3, 4, 5], expand_criterion='info_gain', remove_poor_atts=True, min_weight=100, nb_prediction=False) stream = AGRAWALGenerator(random_state=11) stream.prepare_for_use() cnt = 0 max_samples = 5000 predictions = array('i') proba_predictions = [] wait_samples = 100 while cnt < max_samples: X, y = stream.next_sample() # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): predictions.append(learner.predict(X)[0]) proba_predictions.append(learner.predict_proba(X)[0]) learner.partial_fit(X, y) cnt += 1 expected_predictions = array('i', [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0]) assert np.alltrue(predictions == expected_predictions) expected_info = "VeryFastDecisionRulesClassifier(drift_detector=None, expand_confidence=1e-07, " \ "expand_criterion='info_gain', grace_period=200, max_rules=1000, min_weight=100, " \ "nb_prediction=False, nb_threshold=0, nominal_attributes=[3, 4, 5], ordered_rules=True, " \ "remove_poor_atts=True, rule_prediction='first_hit', tie_threshold=0.05)" info = " ".join([line.strip() for line in learner.get_info().split()]) assert info == expected_info expected_model_description_1 = 'Rule 0 :Att (2) <= 39.550| class :0 {0: 1365.7101742993455}\n' + \ 'Rule 1 :Att (2) <= 58.180| class :1 {1: 1269.7307449971418}\n' + \ 'Rule 2 :Att (2) <= 60.910| class :0 {0: 66.24158839706533, 1: 54.0}\n' + \ 'Default Rule :| class :0 {0: 1316.7584116029348}' expected_model_description_2 = 'Rule 0 :Att (2) <= 39.550| class :0 {0: 1365.7101742993455}\n' + \ 'Rule 1 :Att (2) <= 58.180| class :1 {1: 1269.7307449971418}\n' + \ 'Rule 2 :Att (2) <= 60.910| class :0 {0: 66.241588397065328, 1: 54.0}\n' + \ 'Default Rule :| class :0 {0: 1316.7584116029348}' assert (learner.get_model_description() == expected_model_description_1) or \ (learner.get_model_description() == expected_model_description_2) expected_model_measurements_1 = {'Number of rules: ': 3, 'model_size in bytes': 61735} expected_model_measurements_2 = {'Number of rules: ': 3, 'model_size in bytes': 72607} if sys.platform == 'linux': assert (learner.get_model_measurements() == expected_model_measurements_1) or \ (learner.get_model_measurements() == expected_model_measurements_2) else: # run for coverage learner.get_model_measurements()
def prepare_for_use(self): if self.generator in ['sea', 'sine']: self.concepts = [v for v in range(0, 4)] elif self.generator in ['stagger']: self.concepts = [v for v in range(0, 3)] elif self.generator in ['mixed']: self.concepts = [v for v in range(0, 2)] elif self.generator in ['led']: self.concepts = [v for v in range(0, 7)] elif self.generator in ['tree']: self.concepts = [2, 3, 4, 5, 6, 7, 8, 9, 10] if self.concept_shift_step > 0: for concept in self.all_concepts: stream = AGRAWALGenerator(classification_function=concept, random_state=self.random_state, balance_classes=False, perturbation=0.05) stream.prepare_for_use() self.streams.append(stream) else: for concept in self.concepts: if self.generator == 'agrawal': stream = AGRAWALGenerator(classification_function=concept, random_state=self.random_state, balance_classes=False, perturbation=0.05) elif self.generator == 'sea': stream = SEAGenerator(classification_function=concept, random_state=self.random_state, balance_classes=False, noise_percentage=0.05) elif self.generator == 'sine': stream = SineGenerator(classification_function=concept, random_state=self.random_state, balance_classes=False, has_noise=False) elif self.generator == 'stagger': stream = STAGGERGenerator(classification_function=concept, random_state=self.random_state, balance_classes=False) elif self.generator == 'mixed': stream = MIXEDGenerator(classification_function=concept, random_state=self.random_state, balance_classes=False) elif self.generator == 'led': stream = LEDGeneratorDrift(random_state=self.random_state, has_noise=True, n_drift_features=concept) elif self.generator == 'tree': stream = RandomTreeGenerator(tree_random_state=concept, sample_random_state=concept, max_tree_depth=concept+2, min_leaf_depth=concept, n_classes=2) else: print(f"unknown stream generator {self.generator}") exit() stream.prepare_for_use() self.streams.append(stream) self.cur_stream = self.streams[0] self.drift_stream = self.streams[1] stream = self.cur_stream self.n_samples = stream.n_samples self.n_targets = stream.n_targets self.n_features = stream.n_features self.n_num_features = stream.n_num_features self.n_cat_features = stream.n_cat_features self.n_classes = stream.n_classes self.cat_features_idx = stream.cat_features_idx self.feature_names = stream.feature_names self.target_names = stream.target_names self.target_values = stream.target_values self.n_targets = stream.n_targets self.name = 'drifting' + stream.name print(f"len: {len(self.concepts)}") self.concept_probs = \ self.__get_poisson_probs(len(self.concepts), self.lam)
Automatically generated by Colaboratory. Original file is located at https://colab.research.google.com/drive/1uHKbJ3KLUITTHJRxegzbTvA_-6M7eO5v """ !pip install -U scikit-multiflow from skmultiflow.data import AGRAWALGenerator from skmultiflow.trees import HoeffdingTree from skmultiflow.evaluation import EvaluatePrequential import numpy as np # 1. Create a stream stream = AGRAWALGenerator() stream.prepare_for_use() # 2. Instantiate the HoeffdingTree classifier ht = HoeffdingTree() # # 3. Setup the evaluator # evaluator = EvaluatePrequential(show_plot=False, # pretrain_size=500, # max_samples=500) # # 4. Run evaluation # evaluator.evaluate(stream=stream, model=ht) def base_classifier(e, U, I, L, D, wd, ws): return print("I am here")