def test_oracle_better(): gen = SEAGenerator(random_state=42) gen.prepare_for_use() evaluator_dyn2sel = dyn2selPrequential( n_wait=100, max_samples=1000, pretrain_size=0 ) dynse_rank = DYNSEMethod(NaiveBayes(), 100, Rank()) f = StringIO() with redirect_stdout(f): evaluator_dyn2sel.evaluate(gen, dynse_rank) out = f.getvalue() f.close() acc_rank = out[out.find("Accuracy") :] acc_rank = acc_rank[acc_rank.find(":") + 2 :] acc_rank = acc_rank[: acc_rank.find("\n")] acc_rank = float(acc_rank) evaluator_dyn2sel = dyn2selPrequential( n_wait=100, max_samples=1000, pretrain_size=0 ) dynse_oracle = DYNSEMethod(NaiveBayes(), 100, Oracle()) f = StringIO() with redirect_stdout(f): evaluator_dyn2sel.evaluate(gen, dynse_oracle) out = f.getvalue() f.close() acc_oracle = out[out.find("Accuracy") :] acc_oracle = acc_oracle[acc_oracle.find(":") + 2 :] acc_oracle = acc_oracle[: acc_oracle.find("\n")] acc_oracle = float(acc_oracle) assert acc_oracle > acc_rank
class Bayes(IncrementalClassifier): def __init__(self): super().__init__() self.clf = NaiveBayes() def partial_fit(self, one_row): self.clf.partial_fit([one_row[0]], [one_row[1]]) def predict(self, x): return self.clf.predict(x)
def test_equality_multiflow(): gen = SEAGenerator(random_state=42) gen.prepare_for_use() evaluator_mtflow = mtflowPrequential( max_samples=1000, pretrain_size=0, restart_stream=True ) evaluator_dyn2sel = dyn2selPrequential(max_samples=1000, pretrain_size=0) nb_mtflow = evaluator_mtflow.evaluate(gen, NaiveBayes())[0].__dict__ nb_dyn2sel = evaluator_dyn2sel.evaluate(gen, NaiveBayes())[0].__dict__ del nb_mtflow["_attribute_observers"] del nb_dyn2sel["_attribute_observers"] assert nb_mtflow == nb_dyn2sel
def test_pretrain_size(): gen = SEAGenerator(random_state=42) gen.prepare_for_use() evaluator_dyn2sel = dyn2selPrequential( n_wait=100, max_samples=1000, pretrain_size=150 ) dynse_rank = DYNSEMethod(NaiveBayes(), 100, Rank()) evaluator_dyn2sel.evaluate(gen, dynse_rank) evaluator_dyn2sel = dyn2selPrequential( n_wait=100, max_samples=1000, pretrain_size=150 ) dynse_oracle = DYNSEMethod(NaiveBayes(), 100, Oracle()) evaluator_dyn2sel.evaluate(gen, dynse_oracle)
def __init__(self, n_estimators=5, base_estimator=NaiveBayes(), beta=0.8, gamma=0.1, pruning='weakest'): """ Creates a new instance of AdditiveExpertEnsembleClassifier. """ super().__init__() self.n_estimators = n_estimators self.base_estimator = base_estimator self.beta = beta self.gamma = gamma self.pruning = pruning assert self.pruning in ('weakest', 'oldest'), \ 'Unknown pruning strategy: {}'.format(self.pruning) # Following attributes are set later self.epochs = None self.num_classes = None self.experts = None self.reset()
def test_evaluate_delayed_coverage(tmpdir): from skmultiflow.data import SEAGenerator from skmultiflow.bayes import NaiveBayes max_samples = 1000 # Stream data = SEAGenerator(random_state=1) # Get X and y X, y = data.next_sample(max_samples) time = generate_random_dates(seed=1, samples=max_samples) # Setup temporal stream stream = TemporalDataStream(X, y, time, ordered=False) # Learner nb = NaiveBayes() output_file = os.path.join(str(tmpdir), "prequential_delayed_summary.csv") metrics = ['running_time', 'model_size'] evaluator = EvaluatePrequentialDelayed(max_samples=max_samples, metrics=metrics, data_points_for_classification=True, output_file=output_file) evaluator.evaluate(stream=stream, model=nb, model_names=['NB'])
def __init__(self, n_estimators=10, n_kept_estimators=30, base_estimator=NaiveBayes(), window_size=200, n_splits=5): """ Create a new ensemble""" super().__init__() # top K classifiers self.n_estimators = n_estimators # total number of classifiers to keep self.n_kept_estimators = n_kept_estimators # base learner self.base_estimator = base_estimator # the ensemble in which the classifiers are sorted by their weight self.models_pool = [] # cross validation fold self.n_splits = n_splits # chunk-related information self.window_size = window_size # chunk size self.p = -1 # chunk pointer self.X_chunk = None self.y_chunk = None
def AdditiveExpertEnsemble(n_estimators=5, base_estimator=NaiveBayes(), beta=0.8, gamma=0.1, pruning='weakest'): # pragma: no cover warnings.warn("’AdditiveExpertEnsemble’ has been renamed to ‘AdditiveExpertEnsembleClassifier’ in v0.5.0.\n" "The old name will be removed in v0.7.0", category=FutureWarning) return AdditiveExpertEnsembleClassifier(n_estimators=n_estimators, base_estimator=base_estimator, beta=beta, gamma=gamma, pruning=pruning)
def DynamicWeightedMajority(n_estimators=5, base_estimator=NaiveBayes(), period=50, beta=0.5, theta=0.01): # pragma: no cover warnings.warn("'DynamicWeightedMajority' has been renamed to 'DynamicWeightedMajorityClassifier' in v0.5.0.\n" "The old name will be removed in v0.7.0", category=FutureWarning) return DynamicWeightedMajorityClassifier(n_estimators=n_estimators, base_estimator=base_estimator, period=period, beta=beta, theta=theta)
def AccuracyWeightedEnsemble(n_estimators=10, n_kept_estimators=30, base_estimator=NaiveBayes(), window_size=200, n_splits=5): # pragma: no cover warnings.warn("’AccuracyWeightedEnsemble’ has been renamed to ‘AccuracyWeightedEnsembleClassifier’ in v0.5.0.\n" "The old name will be removed in v0.7.0", category=FutureWarning) return AccuracyWeightedEnsembleClassifier(n_estimators=n_estimators, n_kept_estimators=n_kept_estimators, base_estimator=base_estimator, window_size=window_size, n_splits=n_splits)
def __init__(self, n_estimators=5, base_estimator=NaiveBayes(), period=50, beta=0.5, theta=0.01): """ Creates a new instance of DynamicWeightedMajority. """ super().__init__(n_estimators, base_estimator, period, beta, theta)
def evaluation_Naive_Bayes(): classifiers = [ NaiveBayes() ] # Array mit Klassifikationsalgorithmen die getestet werden sollen cv = CrossValidation(clfs=classifiers, max_samples=1000000, test_size=1) cv.streams = init_standard_streams_naive_bayes( ) + init_real_world_naive_bayes() + cv.init_reoccuring_streams( ) # initialisiert Stream Generatoren des Scikit-Multiflow Package cv.test() cv.save_summary()
def add_element(self, X, y): if self.in_concept_change: self.reset() X, y = np.asarray(X), np.asarray(y) # if X.ndim != 1 or y.ndim != 1: # raise ValueError("input_value should has one dimension") if (not self.trained) and len(self.d_train_X) < self.n: self.d_train_X.append(X) self.d_train_y.append(y) if len(self.d_train_X) == self.n: self.l.partial_fit(np.asarray(self.d_train_X), np.asarray(self.d_train_y)) self.trained = True return if len(self.d_train_X) < self.w: self.d_train_X.append(X) self.d_train_y.append(y) return self.d_buffer_X.append(X) self.d_buffer_y.append(y) if len(self.d_buffer_X) < self.w: return self.d_train_X, self.d_train_y = self.ldd_dis(np.asarray(self.d_train_X), np.asarray(self.d_train_y), np.asarray(self.d_buffer_X), np.asarray(self.d_buffer_y)) self.l = NaiveBayes() self.l.fit(self.d_train_X, self.d_train_y) self.d_train_X = self.d_train_X.tolist() self.d_train_y = self.d_train_y.tolist() print(len(self.d_train_X)) self.d_buffer_X = [] self.d_buffer_y = [] return
def __init__(self, n_estimators=10, n_kept_estimators=30, base_estimator=NaiveBayes(), window_size=200, n_splits=5): """ Create a new ensemble""" super().__init__(n_estimators, n_kept_estimators, base_estimator, window_size, n_splits)
def test_ensemble_size(): # since each member of the ensemble is initialized when the number of instances reach the chunk size, the size of # the ensemble should n_samples // chunk_size chunk_size = 100 n_samples = 1050 gen = SEAGenerator(balance_classes=True) # gen.prepare_for_use() mde = MDEMethod(NaiveBayes(), chunk_size, KNORAE(), alpha=0.0) X, y = gen.next_sample(n_samples) mde.partial_fit(X, y) assert len(mde.ensemble) == n_samples // chunk_size
def test_ensemble_size(): # since each member of the ensemble is initialized when the number of instances reach the chunk size, the size of # the ensemble should n_samples // chunk_size chunk_size = 100 n_samples = 1050 gen = SEAGenerator() # gen.prepare_for_use() dynse = DYNSEMethod(NaiveBayes(), chunk_size, ModifiedRank()) X, y = gen.next_sample(n_samples) dynse.partial_fit(X, y) assert len(dynse.ensemble) == n_samples // chunk_size
def test_clone(): stream = SEAGenerator(random_state=1) learner = NaiveBayes() cnt = 0 max_samples = 5000 y_pred = array('i') X_batch = [] y_batch = [] y_proba = [] wait_samples = 100 while cnt < max_samples: X, y = stream.next_sample() X_batch.append(X[0]) y_batch.append(y[0]) # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): y_pred.append(learner.predict(X)[0]) y_proba.append(learner.predict_proba(X)[0]) learner.partial_fit(X, y, classes=[0, 1]) cnt += 1 cloned = clone(learner) assert learner._observed_class_distribution != {} and cloned._observed_class_distribution == {}
def __init__(self, stable_estimator=NaiveBayes(), reactive_estimator=NaiveBayes(), window_size=12, threshold=0.2): super().__init__() # default values self.c = None self.stable_base_estimator = stable_estimator self.reactive_base_estimator = reactive_estimator self.stable_estimator = None self.reactive_estimator = None self.t = None self.classes = None self.w = window_size self.theta = math.floor(self.w * threshold) self.instances_X = None self.instances_y = None self.change_detected = None self.number_of_errors = None self.__configure()
def test_accuracy(): # an ensemble of Naive Bayes should perform at the very least 85% with 200 instances of SEAGenerator chunk_size = 100 n_samples_train = 1050 n_samples_test = 200 gen = SEAGenerator(noise_percentage=0.0) # gen.prepare_for_use() nb = NaiveBayes() mde = MDEMethod(nb, chunk_size, KNORAU()) X_train, y_train = gen.next_sample(n_samples_train) X_test, y_test = gen.next_sample(n_samples_test) mde.partial_fit(X_train, y_train) assert mde.score(X_test, y_test) > 0.85
def test_online_csb2(): stream = SEAGenerator(1, noise_percentage=0.067, random_state=112) stream.prepare_for_use() nb = NaiveBayes() learner = OnlineCSB2Classifier(base_estimator=nb, n_estimators=3, cost_positive=1, cost_negative=0.9, random_state=112) first = True cnt = 0 max_samples = 5000 predictions = [] wait_samples = 100 correct_predictions = 0 while cnt < max_samples: X, y = stream.next_sample() # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): predictions.append(learner.predict(X)[0]) if y[0] == predictions[-1]: correct_predictions += 1 if first: learner.partial_fit(X, y, classes=stream.target_values) first = False else: learner.partial_fit(X, y) cnt += 1 performance = correct_predictions / len(predictions) expected_predictions = [ 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1 ] expected_correct_predictions = 43 expected_performance = 0.8775510204081632 assert np.alltrue(predictions == expected_predictions) assert np.isclose(expected_performance, performance) assert correct_predictions == expected_correct_predictions assert type(learner.predict(X)) == np.ndarray assert type(learner.predict_proba(X)) == np.ndarray expected_info = "OnlineCSB2Classifier(base_estimator=NaiveBayes(nominal_attributes=None), cost_negative=0.9, " \ "cost_positive=1, drift_detection=True, n_estimators=3, random_state=112)" info = " ".join([line.strip() for line in learner.get_info().split()]) assert info == expected_info
def test_online_rus_1(): stream = SEAGenerator(1, noise_percentage=0.067, random_state=112) stream.prepare_for_use() nb = NaiveBayes() learner = OnlineRUSBoost(base_estimator=nb, n_estimators=3, sampling_rate=5, algorithm=1, random_state=112) first = True cnt = 0 max_samples = 5000 predictions = [] wait_samples = 100 correct_predictions = 0 while cnt < max_samples: X, y = stream.next_sample() # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): predictions.append(learner.predict(X)[0]) if y[0] == predictions[-1]: correct_predictions += 1 if first: learner.partial_fit(X, y, classes=stream.target_values) first = False else: learner.partial_fit(X, y) cnt += 1 performance = correct_predictions / len(predictions) expected_predictions = [ 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1 ] expected_correct_predictions = 33 expected_performance = 0.673469387755102 assert np.alltrue(predictions == expected_predictions) assert np.isclose(expected_performance, performance) assert correct_predictions == expected_correct_predictions assert type(learner.predict(X)) == np.ndarray assert type(learner.predict_proba(X)) == np.ndarray expected_info = "OnlineRUSBoost(algorithm=1, base_estimator=NaiveBayes(nominal_attributes=None),\n" \ " drift_detection=True, n_estimators=3, random_state=112,\n" \ " sampling_rate=5)" assert learner.get_info() == expected_info
def test_awe(): # prepare the stream stream = HyperplaneGenerator(random_state=1) stream.prepare_for_use() # prepare the ensemble classifier = AccuracyWeightedEnsemble(n_estimators=5, n_kept_estimators=10, base_estimator=NaiveBayes(), window_size=200, n_splits=5) # test the classifier max_samples = 5000 cnt = 0 wait_samples = 100 predictions = array('i') correct = 0 while cnt < max_samples: X, y = stream.next_sample() pred = classifier.predict(X) # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): predictions.append(int(pred[0])) classifier.partial_fit(X, y) cnt += 1 if pred[0] == y: correct += 1 # assert model predictions expected_predictions = array('i', [ 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1 ]) # assert model performance expected_accuracy = 0.875 accuracy = correct / max_samples assert expected_accuracy == accuracy assert np.alltrue(predictions == expected_predictions) # assert model information expected_info = "AccuracyWeightedEnsemble: n_estimators: 5 - " \ "n_kept_estimators: 10 - " \ "base_estimator: NaiveBayes: nominal attributes: [] - - " \ "window_size: 200 - " \ "n_splits: 5" assert classifier.get_info() == expected_info
def test_additive_expert_ensemble_weakest(): stream = SEAGenerator(1, noise_percentage=0.067, random_state=112) stream.prepare_for_use() learner = AdditiveExpertEnsemble(3, NaiveBayes(), beta=0.5, gamma=0.1, pruning='weakest') cnt = 0 max_samples = 5000 predictions = [] wait_samples = 100 correct_predictions = 0 first = True while cnt < max_samples: X, y = stream.next_sample() # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): predictions.append(learner.predict(X)[0]) if y[0] == predictions[-1]: correct_predictions += 1 if first: learner.partial_fit(X, y, classes=stream.target_values) first = False else: learner.partial_fit(X, y) cnt += 1 performance = correct_predictions / len(predictions) expected_predictions = [ 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1 ] expected_correct_predictions = 45 expected_performance = 0.9183673469387755 assert np.alltrue(predictions == expected_predictions) assert np.isclose(expected_performance, performance) assert correct_predictions == expected_correct_predictions assert type(learner.predict(X)) == np.ndarray expected_info = "AdditiveExpertEnsemble(base_estimator=NaiveBayes(nominal_attributes=None),\n" \ " beta=0.5, gamma=0.1, n_estimators=3, pruning='weakest')" assert learner.get_info() == expected_info
def test_online_smote_bagging(): stream = SEAGenerator(1, noise_percentage=0.067, random_state=112) nb = NaiveBayes() learner = OnlineSMOTEBaggingClassifier(base_estimator=nb, n_estimators=3, sampling_rate=2, random_state=112) first = True cnt = 0 max_samples = 5000 predictions = [] wait_samples = 100 correct_predictions = 0 while cnt < max_samples: X, y = stream.next_sample() # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): predictions.append(learner.predict(X)[0]) if y[0] == predictions[-1]: correct_predictions += 1 if first: learner.partial_fit(X, y, classes=[0, 1]) first = False else: learner.partial_fit(X, y) cnt += 1 performance = correct_predictions / len(predictions) expected_predictions = [ 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1 ] expected_correct_predictions = 42 expected_performance = 0.8571428571428571 assert np.alltrue(predictions == expected_predictions) assert np.isclose(expected_performance, performance) assert correct_predictions == expected_correct_predictions assert type(learner.predict(X)) == np.ndarray assert type(learner.predict_proba(X)) == np.ndarray expected_info = "OnlineSMOTEBaggingClassifier(base_estimator=NaiveBayes(nominal_attributes=None), " \ "drift_detection=True, n_estimators=3, random_state=None, sampling_rate=2)" info = " ".join([line.strip() for line in learner.get_info().split()]) assert info == expected_info
def test_dynamic_weighted_majority(): stream = SEAGenerator(1, noise_percentage=0.067, random_state=112) learner = DynamicWeightedMajorityClassifier(3, NaiveBayes(), beta=0.5, theta=0.01) cnt = 0 max_samples = 5000 predictions = [] wait_samples = 100 correct_predictions = 0 first = True while cnt < max_samples: X, y = stream.next_sample() # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): predictions.append(learner.predict(X)[0]) if y[0] == predictions[-1]: correct_predictions += 1 if first: learner.partial_fit(X, y, classes=stream.target_values) first = False else: learner.partial_fit(X, y) cnt += 1 performance = correct_predictions / len(predictions) expected_predictions = [ 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1 ] expected_correct_predictions = 44 expected_performance = 0.8979591836734694 assert np.alltrue(predictions == expected_predictions) assert np.isclose(expected_performance, performance) assert correct_predictions == expected_correct_predictions assert type(learner.predict(X)) == np.ndarray expected_info = 'DynamicWeightedMajorityClassifier(base_estimator=NaiveBayes(nominal_attributes=None),\n' \ ' beta=0.5, n_estimators=3, period=50,\n' \ ' theta=0.01)' assert learner.get_info() == expected_info
def test_online_rus_3(): stream = SEAGenerator(1, noise_percentage=0.067, random_state=112) nb = NaiveBayes() learner = OnlineRUSBoostClassifier(base_estimator=nb, n_estimators=3, sampling_rate=5, algorithm=3, random_state=112) first = True cnt = 0 max_samples = 5000 predictions = [] wait_samples = 100 correct_predictions = 0 while cnt < max_samples: X, y = stream.next_sample() # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): predictions.append(learner.predict(X)[0]) if y[0] == predictions[-1]: correct_predictions += 1 if first: learner.partial_fit(X, y, classes=[0, 1]) first = False else: learner.partial_fit(X, y) cnt += 1 performance = correct_predictions / len(predictions) expected_predictions = [ 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1 ] expected_correct_predictions = 35 expected_performance = 0.7142857142857143 assert np.alltrue(predictions == expected_predictions) assert np.isclose(expected_performance, performance) assert correct_predictions == expected_correct_predictions assert type(learner.predict(X)) == np.ndarray assert type(learner.predict_proba(X)) == np.ndarray assert type(learner.predict_proba(X)) == np.ndarray
def test_leverage_bagging_code_matrix(): nb = NaiveBayes() # enable the output detection code matrix learner = LeveragingBaggingClassifier(base_estimator=nb, n_estimators=5, random_state=12, enable_code_matrix=True) y_expected = np.asarray([0, 0, 3, 2, 3, 1, 4, 1, 3, 4, 2, 4, 2, 2, 0, 0, 2, 4, 2, 4, 0, 4, 2, 4, 2, 4, 0, 4, 1, 3, 2, 1, 2, 4, 2, 4, 1, 3, 0, 4, 2, 0, 0, 4, 3, 2, 4, 4, 2, 4], dtype=np.int) run_prequential_supervised(RandomTreeGenerator(tree_random_state=1, sample_random_state=12, n_classes=5), learner, max_samples=2000, n_wait=40, target_values=[0,1,2,3,4], y_expected=y_expected)
def test_leverage_bagging_me(): nb = NaiveBayes() # leveraging_bag_me learner = LeveragingBaggingClassifier(base_estimator=nb, n_estimators=5, random_state=112, leverage_algorithm='leveraging_bag_me') y_expected = np.asarray([0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0], dtype=np.int) run_prequential_supervised(ConceptDriftStreamGenerator(position=500, width=100, random_state=112), learner, max_samples=2000, n_wait=40, target_values=[0,1], y_expected=y_expected)
def test_online_adac2(): stream = SEAGenerator(1, noise_percentage=0.067, random_state=112) stream.prepare_for_use() nb = NaiveBayes() learner = OnlineAdaC2(base_estimator=nb, n_estimators=3, random_state=112, cost_positive=1, cost_negative=1) first = True cnt = 0 max_samples = 5000 predictions = [] wait_samples = 100 correct_predictions = 0 while cnt < max_samples: X, y = stream.next_sample() # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): predictions.append(learner.predict(X)[0]) if y[0] == predictions[-1]: correct_predictions += 1 if first: learner.partial_fit(X, y, classes=stream.target_values) first = False else: learner.partial_fit(X, y) cnt += 1 performance = correct_predictions / len(predictions) expected_predictions = [ 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1 ] expected_correct_predictions = 44 expected_performance = 0.8979591836734694 assert np.alltrue(predictions == expected_predictions) assert np.isclose(expected_performance, performance) assert correct_predictions == expected_correct_predictions assert type(learner.predict(X)) == np.ndarray assert type(learner.predict_proba(X)) == np.ndarray
def __init__(self, min_estimators=5, max_estimators=20, base_estimators=[NaiveBayes(), HoeffdingTreeClassifier()], period=1000, alpha=0.002, beta=1.5, theta=0.05, enable_diversity=True): """ Creates a new instance of DiversifiedDynamicClassWeightedClassifier. """ super().__init__() self.enable_diversity = enable_diversity self.min_estimators = min_estimators self.max_estimators = max_estimators self.base_estimators = base_estimators self.alpha = alpha self.beta = beta self.theta = theta self.period = period self.p = -1 self.n_estimators = max_estimators self.epochs = None self.num_classes = None self.experts = None self.div = [] self.window_size = None self.X_batch = None self.y_batch = None self.y_batch_experts = None # custom measurements atributes self.custom_measurements = [] self.custom_time = [] self.reset()