def test_grid(): clfs = [ OzaBagging(base_estimator=KNN()), OzaBaggingAdwin(base_estimator=KNN()), AdaptiveRandomForest(), SAMKNN() ] cv = CrossValidation(clfs=clfs, max_samples=1000000, test_size=1) cv.streams = [ ConceptDriftStream( stream=LEDGeneratorDrift(has_noise=False, noise_percentage=0.0, n_drift_features=3), drift_stream=LEDGeneratorDrift(has_noise=False, noise_percentage=0.0, n_drift_features=7), random_state=None, alpha=90.0, # angle of change grade 0 - 90 position=250000, width=1), ConceptDriftStream( stream=LEDGeneratorDrift(has_noise=False, noise_percentage=0.0, n_drift_features=3), drift_stream=LEDGeneratorDrift(has_noise=False, noise_percentage=0.0, n_drift_features=7), random_state=None, alpha=90.0, # angle of change grade 0 - 90 position=250000, width=50000) ] cv.test() cv.save_summary()
def evaluation2(): classifiers = [ OzaBagging(base_estimator=KNN()), OzaBaggingAdwin(base_estimator=KNN()), RSLVQ(prototypes_per_class=4, sigma=6), ARSLVQ(prototypes_per_class=4, sigma=6) ] # Array mit Klassifikationsalgorithmen die getestet werden sollen cv = CrossValidation(clfs=classifiers, max_samples=1000000, test_size=1) cv.streams = cv.init_standard_streams() + cv.init_real_world( ) + cv.init_reoccuring_streams( ) # initialisiert Stream Generatoren des Scikit-Multiflow Package cv.test() cv.save_summary()
def __init__(self, base_estimator=KNN(), n_estimators=10, w=6, delta=0.002, enable_code_matrix=False, leverage_algorithm='leveraging_bag', random_state=None): super().__init__() # default values self.ensemble = None self.adwin_ensemble = None self.n_detected_changes = None self.matrix_codes = None self.classes = None self.init_matrix_codes = None self.random_state = None self.base_estimator = base_estimator self._init_n_estimators = n_estimators self.enable_matrix_codes = enable_code_matrix self.w = w self.delta = delta if leverage_algorithm not in self.LEVERAGE_ALGORITHMS: raise ValueError("Leverage algorithm not supported.") self.leveraging_algorithm = leverage_algorithm self._init_random_state = random_state self.__configure()
def test_KNN(test_path, package_path): test_file = os.path.join(package_path, 'src/skmultiflow/data/datasets/sea_big.csv') stream = FileStream(test_file, -1, 1) stream.prepare_for_use() learner = KNN(n_neighbors=8, max_window_size=2000, leaf_size=40) cnt = 0 max_samples = 5000 predictions = [] wait_samples = 100 while cnt < max_samples: X, y = stream.next_sample() # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): predictions.append(learner.predict(X)[0]) learner.partial_fit(X, y) cnt += 1 expected_predictions = [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0] assert np.alltrue(predictions == expected_predictions)
def evaluation(): classifiers = [ GLVQ(prototypes_per_class=4), HoeffdingTree(), HAT(), KNN(), SAMKNN(), LeverageBagging(), KNNAdwin(max_window_size=1000) ] # Array mit Klassifikationsalgorithmen die getestet werden sollen cv = CrossValidation(clfs=classifiers, max_samples=1000000, test_size=1) cv.streams = cv.init_standard_streams() + cv.init_real_world( ) + cv.init_reoccuring_streams( ) # initialisiert Stream Generatoren des Scikit-Multiflow Package cv.test() cv.save_summary()
from skmultiflow.data import FileStream from skmultiflow.lazy.knn import KNN from skmultiflow.evaluation import EvaluatePrequential n_neighbors = 8 max_window_size = 2000 leaf_size = 30 n_estimators = 30 show_plot = True pretrain_size = 100 max_samples = 7000 metrics = ['accuracy'] stream = FileStream('data/stream1.csv') stream.prepare_for_use() mdl = KNN(n_neighbors=n_neighbors, max_window_size=max_window_size, leaf_size=leaf_size) evaluator = EvaluatePrequential(show_plot=show_plot, pretrain_size=pretrain_size, max_samples=max_samples, metrics=metrics) evaluator.evaluate(stream=stream, model=mdl)
def test_knn(): stream = SEAGenerator(random_state=1) stream.prepare_for_use() learner = KNN(n_neighbors=8, max_window_size=2000, leaf_size=40) cnt = 0 max_samples = 5000 predictions = array('i') correct_predictions = 0 wait_samples = 100 X_batch = [] y_batch = [] while cnt < max_samples: X, y = stream.next_sample() X_batch.append(X[0]) y_batch.append(y[0]) # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): predictions.append(learner.predict(X)[0]) if y[0] == predictions[-1]: correct_predictions += 1 learner.partial_fit(X, y) cnt += 1 expected_predictions = array('i', [ 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1 ]) assert np.alltrue(predictions == expected_predictions) expected_correct_predictions = 49 assert correct_predictions == expected_correct_predictions expected_info = 'KNN: - n_neighbors: 8 - max_window_size: 2000 - leaf_size: 40' assert learner.get_info() == expected_info learner.reset() X_batch = np.array(X_batch) y_batch = np.array(y_batch) learner.fit(X_batch[:4500], y_batch[:4500], classes=[0, 1]) predictions = learner.predict(X_batch[4501:4550]) expected_predictions = array('i', [ 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0 ]) assert np.alltrue(predictions == expected_predictions) correct_predictions = sum(predictions == y_batch[4501:4550]) expected_correct_predictions = 49 assert correct_predictions == expected_correct_predictions assert type(learner.predict(X)) == np.ndarray assert type(learner.predict_proba(X)) == np.ndarray
def test_reoccuring(self): s1 = MIXEDGenerator(classification_function = 1, random_state= 112, balance_classes = False) s2 = MIXEDGenerator(classification_function = 0, random_state= 112, balance_classes = False) stream = ReoccuringDriftStream(stream=s1, drift_stream=s2, random_state=None, alpha=90.0, # angle of change grade 0 - 90 position=2000, width=500) stream.prepare_for_use() evaluator = EvaluatePrequential(show_plot=False,batch_size=10, max_samples=1000, metrics=['accuracy', 'kappa_t', 'kappa_m', 'kappa'], output_file=None) eval = evaluator.evaluate(stream=stream, model=OzaBaggingAdwin(base_estimator=KNN())) measurements = np.asarray(evaluator.get_measurements()[0])[0] self.assertIsNotNone(eval) self.assertTrue(measurements.get_accuracy() >= 0.6, msg='Accuracy was {} but has to be greater than 0.6'. format(measurements.get_accuracy()))
def hyperparametertuning_classifiers(learn, X, y, knn_max_w_size): cl_name = learn.__class__.__name__ # print (cl_name) scor = 'balanced_accuracy' cv = 10 if cl_name == 'KNN': KNN_grid = { 'n_neighbors': [3, 5, 7, 10, 15], 'leaf_size': [3, 5, 7, 10, 15], 'algorithm': ['kd_tree'] } grid_cv_KNN = GridSearchCV(estimator=KNeighborsClassifier(), cv=cv, scoring=scor, param_grid=KNN_grid) # grid_cv_KNN = RandomizedSearchCV(estimator=KNeighborsClassifier(), cv=cv,scoring=scor,param_distributions=KNN_grid) grid_cv_KNN.fit(X.as_matrix(), y.as_matrix().ravel()) # print('grid_cv_KNN.best_params_: ',grid_cv_KNN.best_params_) n_neighbors = grid_cv_KNN.best_params_['n_neighbors'] leaf_size = grid_cv_KNN.best_params_['leaf_size'] tuned_params = { 'n_neighbors': n_neighbors, 'leaf_size': leaf_size, 'max_window_size': knn_max_w_size } tuned_learn = KNN() tuned_learn.set_params(**tuned_params) tuned_learn.fit(X.as_matrix(), y.as_matrix().ravel()) elif cl_name == 'HoeffdingTree': grace_period_range = np.array([25, 75, 150, 300]) tie_threshold_range = np.linspace(0.001, 1.0, 5) split_confidence_range = np.linspace(0.000000001, 0.1, 5) split_criterion_range = ['gini', 'info_gain', 'hellinger'] leaf_prediction_range = ['mc', 'nb', 'nba'] HT_grid = { 'grace_period': grace_period_range, 'tie_threshold': tie_threshold_range, 'split_confidence': split_confidence_range, 'split_criterion': split_criterion_range, 'leaf_prediction': leaf_prediction_range } grid_cv_HT = GridSearchCV(estimator=learn, scoring=scor, cv=cv, param_grid=HT_grid) # grid_cv_HT=RandomizedSearchCV(estimator=learn,scoring=scor,cv=cv,param_distributions=HT_grid) grid_cv_HT.fit(X.as_matrix(), y.as_matrix().ravel()) # print('grid_cv_HT.best_params_: ',grid_cv_HT.best_params_) tuned_params = grid_cv_HT.best_params_ tuned_learn = grid_cv_HT.best_estimator_ elif cl_name == 'NaiveBayes': tuned_params = {'nominal_attributes': None} tuned_learn = NaiveBayes() tuned_learn.set_params(**tuned_params) tuned_learn.fit(X.as_matrix(), y.as_matrix().ravel()) # print('Final tuned algorithm: ',tuned_learn) return tuned_learn, tuned_params
labels.columns = ['class'] n_samples = XT.shape[0] - preparatory_size ######################## CURIE ################### lst_dim = [n_bins] * n_feats curie = CA_VonNeumann_Classifier(bins=[], bins_margin=bins_margin, dimensions=lst_dim, cells=empties(lst_dim)) limits_automata = list(np.zeros(1)) #ca_names=['CURIE'] mutants_time = empty_mutant(curie.dimensions) ######################## LEARNERS ################### learners_ref = [HoeffdingTree(), KNN(), NaiveBayes()] ######################## DETECTORS ################### detectores_ref = [DDM(), EDDM(), ADWIN(), PageHinkley(), curie] n_pasos = len(datasets) * len(tipos) * len(learners_ref) * len( detectores_ref) SCORES_LER = [] TIMES_LER = [] RAMS_LER = [] DETECTIONS_LER = [] for ler in range(len(learners_ref)): learner = deepcopy(learners_ref[ler])
def cargaClassifiers(params,n_classes): gamma=params[0][0] n_gaussianRF=params[0][1] window_size=params[1][0] vecinos=params[1][1] hoja_size=params[1][2] #KNN and GRF_KNN clf_1 = KNN(n_neighbors=vecinos, leaf_size=hoja_size, max_window_size=window_size) clf_2 = GRF_KNN(n_neighbors=vecinos, leaf_size=hoja_size, max_window_size=window_size) clf_2.gamma=gamma clf_2.n_gaussianRF=n_gaussianRF #HoeffdingTree, HoeffdingTree_GRF clf_3 = HoeffdingTree() clf_4=GRF_HoeffdingTree() clf_4.gamma=gamma clf_4.n_gaussianRF=n_gaussianRF #HoeffdingAdaptiveTree and GRF_HoeffdingAdaptiveTree clf_5=HAT() clf_6=GRF_HoeffdingAdaptiveTree() clf_6.gamma=gamma clf_6.n_gaussianRF=n_gaussianRF #NaiveBayes and GRF_NaiveBayes # clf_7=NaiveBayes() # # clf_8=GRF_NaiveBayes() # clf_8.gamma=gamma # clf_8.n_gaussianRF=n_gaussianRF #GNB and GRF_GNB clf_9=GaussianNB() clf_10=GRF_GaussianNB() clf_10.gamma=gamma clf_10.n_gaussianRF=n_gaussianRF #SGDClassifier and GRF_SGDClassifier clf_11=SGDClassifier(max_iter=1) clf_12=GRF_SGDClassifier(max_iter=1) clf_12.gamma=gamma clf_12.n_gaussianRF=n_gaussianRF #Perceptron and GRF_Perceptron clf_13=SGDClassifier(loss='perceptron', eta0=1, learning_rate='constant', penalty=None,max_iter=1) clf_14=GRF_SGDClassifier(loss='perceptron', eta0=1, learning_rate='constant', penalty=None,max_iter=1) clf_14.gamma=gamma clf_14.n_gaussianRF=n_gaussianRF #PassiveAggressiveClassifier and GRF_PassiveAggressiveClassifier clf_15=PassiveAggressiveClassifier(max_iter=1) clf_16=GRF_PassiveAggressiveClassifier(max_iter=1) clf_16.gamma=gamma clf_16.n_gaussianRF=n_gaussianRF #MLPClassifier and GRF_MLPClassifier clf_17=MLPClassifier(batch_size=1,max_iter=1,hidden_layer_sizes=(100,)) clf_18=GRF_MLPClassifier(batch_size=1,max_iter=1,hidden_layer_sizes=(100,)) clf_18.gamma=gamma clf_18.n_gaussianRF=n_gaussianRF classifiers = [clf_1,clf_2,clf_3,clf_4,clf_5,clf_6,clf_9,clf_10,clf_11,clf_12,clf_13,clf_14,clf_15,clf_16,clf_17,clf_18] classifiers_init = [clf_1,clf_2,clf_3,clf_4,clf_5,clf_6,clf_9,clf_10,clf_11,clf_12,clf_13,clf_14,clf_15,clf_16,clf_17,clf_18] # classifiers = [clf_1,clf_2] # classifiers_init = [clf_1,clf_2] names=[] for c in range(len(classifiers)): classifier=classifiers[c] class_name='' if str(classifier)[26:33]=='GRF_KNN': class_name=str(classifier)[26:33] elif str(classifier)[22:25]=='KNN': class_name=str(classifier)[22:25] elif str(classifier)[34:47]=='HoeffdingTree': class_name='HT' elif str(classifier)[38:55]=='GRF_HoeffdingTree': class_name='GRF_HT' elif str(classifier)[43:46]=='HAT': class_name=str(classifier)[43:46] elif str(classifier)[47:72]=='GRF_HoeffdingAdaptiveTree': class_name='GRF_HAT' # elif str(classifier)[31:41]=='NaiveBayes': # class_name='MNB' # elif str(classifier)[35:49]=='GRF_NaiveBayes': # class_name='GRF_MNB' elif str(classifier)[0:10]=='GaussianNB': class_name='GNB' elif str(classifier)[0:14]=='GRF_GaussianNB': class_name='GRF_GNB' elif str(classifier)[0:13]=='SGDClassifier' and classifier.loss=='hinge': class_name='SGD' elif str(classifier)[0:17]=='GRF_SGDClassifier' and classifier.loss=='hinge': class_name='GRF_SGD' elif str(classifier)[0:13]=='SGDClassifier' and classifier.loss=='perceptron': class_name='Perceptron' elif str(classifier)[0:17]=='GRF_SGDClassifier' and classifier.loss=='perceptron': class_name='GRF_Perceptron' elif str(classifier)[0:27]=='PassiveAggressiveClassifier': class_name='PA' elif str(classifier)[0:31]=='GRF_PassiveAggressiveClassifier': class_name='GRF_PA' elif str(classifier)[0:13]=='MLPClassifier': class_name='MLP' elif str(classifier)[0:17]=='GRF_MLPClassifier': class_name='GRF_MLP' # elif str(classifier)[0:9]=='OnlineGRF': # class_name=str(classifier)[0:9] names.append(class_name) return classifiers,names,classifiers_init
def test_grid(): clfs = [RRSLVQ(prototypes_per_class=4,sigma=8),RSLVQ(prototypes_per_class=4,sigma=8),HAT(),OzaBaggingAdwin(base_estimator=KNN()),AdaptiveRandomForest(),SAMKNN()] cv = CrossValidation(clfs=clfs,max_samples=1000000,test_size=1) cv.streams = cv.init_reoccuring_streams() cv.test() cv.save_summary() print("here")