Beispiel #1
0
def test_grid():
    clfs = [
        OzaBagging(base_estimator=KNN()),
        OzaBaggingAdwin(base_estimator=KNN()),
        AdaptiveRandomForest(),
        SAMKNN()
    ]
    cv = CrossValidation(clfs=clfs, max_samples=1000000, test_size=1)
    cv.streams = [
        ConceptDriftStream(
            stream=LEDGeneratorDrift(has_noise=False,
                                     noise_percentage=0.0,
                                     n_drift_features=3),
            drift_stream=LEDGeneratorDrift(has_noise=False,
                                           noise_percentage=0.0,
                                           n_drift_features=7),
            random_state=None,
            alpha=90.0,  # angle of change grade 0 - 90
            position=250000,
            width=1),
        ConceptDriftStream(
            stream=LEDGeneratorDrift(has_noise=False,
                                     noise_percentage=0.0,
                                     n_drift_features=3),
            drift_stream=LEDGeneratorDrift(has_noise=False,
                                           noise_percentage=0.0,
                                           n_drift_features=7),
            random_state=None,
            alpha=90.0,  # angle of change grade 0 - 90
            position=250000,
            width=50000)
    ]
    cv.test()
    cv.save_summary()
Beispiel #2
0
def evaluation2():
    classifiers = [
        OzaBagging(base_estimator=KNN()),
        OzaBaggingAdwin(base_estimator=KNN()),
        RSLVQ(prototypes_per_class=4, sigma=6),
        ARSLVQ(prototypes_per_class=4, sigma=6)
    ]  # Array mit Klassifikationsalgorithmen die getestet werden sollen
    cv = CrossValidation(clfs=classifiers, max_samples=1000000, test_size=1)
    cv.streams = cv.init_standard_streams() + cv.init_real_world(
    ) + cv.init_reoccuring_streams(
    )  # initialisiert Stream Generatoren des Scikit-Multiflow Package
    cv.test()
    cv.save_summary()
Beispiel #3
0
    def __init__(self,
                 base_estimator=KNN(),
                 n_estimators=10,
                 w=6,
                 delta=0.002,
                 enable_code_matrix=False,
                 leverage_algorithm='leveraging_bag',
                 random_state=None):

        super().__init__()
        # default values
        self.ensemble = None
        self.adwin_ensemble = None
        self.n_detected_changes = None
        self.matrix_codes = None
        self.classes = None
        self.init_matrix_codes = None
        self.random_state = None
        self.base_estimator = base_estimator
        self._init_n_estimators = n_estimators
        self.enable_matrix_codes = enable_code_matrix
        self.w = w
        self.delta = delta
        if leverage_algorithm not in self.LEVERAGE_ALGORITHMS:
            raise ValueError("Leverage algorithm not supported.")
        self.leveraging_algorithm = leverage_algorithm
        self._init_random_state = random_state
        self.__configure()
def test_KNN(test_path, package_path):
    test_file = os.path.join(package_path, 'src/skmultiflow/data/datasets/sea_big.csv')
    stream = FileStream(test_file, -1, 1)
    stream.prepare_for_use()

    learner = KNN(n_neighbors=8, max_window_size=2000, leaf_size=40)
    cnt = 0
    max_samples = 5000
    predictions = []
    wait_samples = 100

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(learner.predict(X)[0])
        learner.partial_fit(X, y)
        cnt += 1

    expected_predictions = [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0,
                            0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0,
                            1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0]

    assert np.alltrue(predictions == expected_predictions)
Beispiel #5
0
def evaluation():
    classifiers = [
        GLVQ(prototypes_per_class=4),
        HoeffdingTree(),
        HAT(),
        KNN(),
        SAMKNN(),
        LeverageBagging(),
        KNNAdwin(max_window_size=1000)
    ]  # Array mit Klassifikationsalgorithmen die getestet werden sollen
    cv = CrossValidation(clfs=classifiers, max_samples=1000000, test_size=1)
    cv.streams = cv.init_standard_streams() + cv.init_real_world(
    ) + cv.init_reoccuring_streams(
    )  # initialisiert Stream Generatoren des Scikit-Multiflow Package
    cv.test()
    cv.save_summary()
Beispiel #6
0
from skmultiflow.data import FileStream
from skmultiflow.lazy.knn import KNN
from skmultiflow.evaluation import EvaluatePrequential

n_neighbors = 8
max_window_size = 2000
leaf_size = 30
n_estimators = 30
show_plot = True
pretrain_size = 100
max_samples = 7000
metrics = ['accuracy']

stream = FileStream('data/stream1.csv')
stream.prepare_for_use()
mdl = KNN(n_neighbors=n_neighbors,
          max_window_size=max_window_size,
          leaf_size=leaf_size)
evaluator = EvaluatePrequential(show_plot=show_plot,
                                pretrain_size=pretrain_size,
                                max_samples=max_samples,
                                metrics=metrics)
evaluator.evaluate(stream=stream, model=mdl)
Beispiel #7
0
def test_knn():
    stream = SEAGenerator(random_state=1)
    stream.prepare_for_use()

    learner = KNN(n_neighbors=8, max_window_size=2000, leaf_size=40)
    cnt = 0
    max_samples = 5000
    predictions = array('i')
    correct_predictions = 0
    wait_samples = 100
    X_batch = []
    y_batch = []

    while cnt < max_samples:
        X, y = stream.next_sample()
        X_batch.append(X[0])
        y_batch.append(y[0])
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(learner.predict(X)[0])
            if y[0] == predictions[-1]:
                correct_predictions += 1
        learner.partial_fit(X, y)
        cnt += 1

    expected_predictions = array('i', [
        1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0,
        1
    ])
    assert np.alltrue(predictions == expected_predictions)

    expected_correct_predictions = 49
    assert correct_predictions == expected_correct_predictions

    expected_info = 'KNN: - n_neighbors: 8 - max_window_size: 2000 - leaf_size: 40'
    assert learner.get_info() == expected_info

    learner.reset()
    X_batch = np.array(X_batch)
    y_batch = np.array(y_batch)
    learner.fit(X_batch[:4500], y_batch[:4500], classes=[0, 1])
    predictions = learner.predict(X_batch[4501:4550])

    expected_predictions = array('i', [
        1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1,
        1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1,
        0
    ])
    assert np.alltrue(predictions == expected_predictions)

    correct_predictions = sum(predictions == y_batch[4501:4550])
    expected_correct_predictions = 49
    assert correct_predictions == expected_correct_predictions

    assert type(learner.predict(X)) == np.ndarray
    assert type(learner.predict_proba(X)) == np.ndarray
Beispiel #8
0
    def test_reoccuring(self):
        s1 = MIXEDGenerator(classification_function = 1, random_state= 112, balance_classes = False)
        s2 = MIXEDGenerator(classification_function = 0, random_state= 112, balance_classes = False)
        stream = ReoccuringDriftStream(stream=s1,
                                drift_stream=s2,
                                random_state=None,
                                alpha=90.0, # angle of change grade 0 - 90
                                position=2000,
                                width=500)
        stream.prepare_for_use()
        evaluator = EvaluatePrequential(show_plot=False,batch_size=10,
                                        max_samples=1000,
                                        metrics=['accuracy', 'kappa_t', 'kappa_m', 'kappa'],    
                                        output_file=None)
        eval = evaluator.evaluate(stream=stream, model=OzaBaggingAdwin(base_estimator=KNN()))
        

        measurements = np.asarray(evaluator.get_measurements()[0])[0]
        
        self.assertIsNotNone(eval)
        self.assertTrue(measurements.get_accuracy() >= 0.6,
                        msg='Accuracy was {} but has to be greater than 0.6'.
                        format(measurements.get_accuracy()))
Beispiel #9
0
def hyperparametertuning_classifiers(learn, X, y, knn_max_w_size):

    cl_name = learn.__class__.__name__
    #    print (cl_name)

    scor = 'balanced_accuracy'
    cv = 10

    if cl_name == 'KNN':

        KNN_grid = {
            'n_neighbors': [3, 5, 7, 10, 15],
            'leaf_size': [3, 5, 7, 10, 15],
            'algorithm': ['kd_tree']
        }

        grid_cv_KNN = GridSearchCV(estimator=KNeighborsClassifier(),
                                   cv=cv,
                                   scoring=scor,
                                   param_grid=KNN_grid)
        #        grid_cv_KNN = RandomizedSearchCV(estimator=KNeighborsClassifier(), cv=cv,scoring=scor,param_distributions=KNN_grid)
        grid_cv_KNN.fit(X.as_matrix(), y.as_matrix().ravel())
        #        print('grid_cv_KNN.best_params_: ',grid_cv_KNN.best_params_)
        n_neighbors = grid_cv_KNN.best_params_['n_neighbors']
        leaf_size = grid_cv_KNN.best_params_['leaf_size']

        tuned_params = {
            'n_neighbors': n_neighbors,
            'leaf_size': leaf_size,
            'max_window_size': knn_max_w_size
        }

        tuned_learn = KNN()
        tuned_learn.set_params(**tuned_params)
        tuned_learn.fit(X.as_matrix(), y.as_matrix().ravel())

    elif cl_name == 'HoeffdingTree':

        grace_period_range = np.array([25, 75, 150, 300])
        tie_threshold_range = np.linspace(0.001, 1.0, 5)
        split_confidence_range = np.linspace(0.000000001, 0.1, 5)
        split_criterion_range = ['gini', 'info_gain', 'hellinger']
        leaf_prediction_range = ['mc', 'nb', 'nba']

        HT_grid = {
            'grace_period': grace_period_range,
            'tie_threshold': tie_threshold_range,
            'split_confidence': split_confidence_range,
            'split_criterion': split_criterion_range,
            'leaf_prediction': leaf_prediction_range
        }

        grid_cv_HT = GridSearchCV(estimator=learn,
                                  scoring=scor,
                                  cv=cv,
                                  param_grid=HT_grid)
        #        grid_cv_HT=RandomizedSearchCV(estimator=learn,scoring=scor,cv=cv,param_distributions=HT_grid)
        grid_cv_HT.fit(X.as_matrix(), y.as_matrix().ravel())
        #        print('grid_cv_HT.best_params_: ',grid_cv_HT.best_params_)

        tuned_params = grid_cv_HT.best_params_
        tuned_learn = grid_cv_HT.best_estimator_

    elif cl_name == 'NaiveBayes':

        tuned_params = {'nominal_attributes': None}
        tuned_learn = NaiveBayes()
        tuned_learn.set_params(**tuned_params)
        tuned_learn.fit(X.as_matrix(), y.as_matrix().ravel())


#    print('Final tuned algorithm: ',tuned_learn)

    return tuned_learn, tuned_params
Beispiel #10
0
        labels.columns = ['class']
        n_samples = XT.shape[0] - preparatory_size

        ######################## CURIE ###################

        lst_dim = [n_bins] * n_feats
        curie = CA_VonNeumann_Classifier(bins=[],
                                         bins_margin=bins_margin,
                                         dimensions=lst_dim,
                                         cells=empties(lst_dim))
        limits_automata = list(np.zeros(1))
        #ca_names=['CURIE']
        mutants_time = empty_mutant(curie.dimensions)

        ######################## LEARNERS ###################
        learners_ref = [HoeffdingTree(), KNN(), NaiveBayes()]
        ######################## DETECTORS ###################
        detectores_ref = [DDM(), EDDM(), ADWIN(), PageHinkley(), curie]

        n_pasos = len(datasets) * len(tipos) * len(learners_ref) * len(
            detectores_ref)

        SCORES_LER = []
        TIMES_LER = []
        RAMS_LER = []
        DETECTIONS_LER = []

        for ler in range(len(learners_ref)):

            learner = deepcopy(learners_ref[ler])
def cargaClassifiers(params,n_classes):

    gamma=params[0][0]
    n_gaussianRF=params[0][1]
    window_size=params[1][0]
    vecinos=params[1][1]
    hoja_size=params[1][2]
    
    #KNN and GRF_KNN
    clf_1 = KNN(n_neighbors=vecinos, leaf_size=hoja_size, max_window_size=window_size)
    
    clf_2 = GRF_KNN(n_neighbors=vecinos, leaf_size=hoja_size, max_window_size=window_size)
    clf_2.gamma=gamma
    clf_2.n_gaussianRF=n_gaussianRF
    
    #HoeffdingTree, HoeffdingTree_GRF
    clf_3 = HoeffdingTree()
    
    clf_4=GRF_HoeffdingTree()
    clf_4.gamma=gamma
    clf_4.n_gaussianRF=n_gaussianRF
    
    #HoeffdingAdaptiveTree and GRF_HoeffdingAdaptiveTree
    clf_5=HAT()
    
    clf_6=GRF_HoeffdingAdaptiveTree()
    clf_6.gamma=gamma
    clf_6.n_gaussianRF=n_gaussianRF
    
    #NaiveBayes and GRF_NaiveBayes
#    clf_7=NaiveBayes()
#    
#    clf_8=GRF_NaiveBayes()
#    clf_8.gamma=gamma
#    clf_8.n_gaussianRF=n_gaussianRF

    #GNB and GRF_GNB
    clf_9=GaussianNB()
    
    clf_10=GRF_GaussianNB()
    clf_10.gamma=gamma
    clf_10.n_gaussianRF=n_gaussianRF

    #SGDClassifier and GRF_SGDClassifier
    clf_11=SGDClassifier(max_iter=1)
    
    clf_12=GRF_SGDClassifier(max_iter=1)
    clf_12.gamma=gamma
    clf_12.n_gaussianRF=n_gaussianRF

    #Perceptron and GRF_Perceptron
    clf_13=SGDClassifier(loss='perceptron', eta0=1, learning_rate='constant', penalty=None,max_iter=1) 
    
    clf_14=GRF_SGDClassifier(loss='perceptron', eta0=1, learning_rate='constant', penalty=None,max_iter=1)
    clf_14.gamma=gamma
    clf_14.n_gaussianRF=n_gaussianRF
    
    #PassiveAggressiveClassifier and GRF_PassiveAggressiveClassifier
    clf_15=PassiveAggressiveClassifier(max_iter=1)
    
    clf_16=GRF_PassiveAggressiveClassifier(max_iter=1)
    clf_16.gamma=gamma
    clf_16.n_gaussianRF=n_gaussianRF
    
    #MLPClassifier and GRF_MLPClassifier
    clf_17=MLPClassifier(batch_size=1,max_iter=1,hidden_layer_sizes=(100,))
    
    clf_18=GRF_MLPClassifier(batch_size=1,max_iter=1,hidden_layer_sizes=(100,))
    clf_18.gamma=gamma
    clf_18.n_gaussianRF=n_gaussianRF
    
    classifiers = [clf_1,clf_2,clf_3,clf_4,clf_5,clf_6,clf_9,clf_10,clf_11,clf_12,clf_13,clf_14,clf_15,clf_16,clf_17,clf_18]
    classifiers_init = [clf_1,clf_2,clf_3,clf_4,clf_5,clf_6,clf_9,clf_10,clf_11,clf_12,clf_13,clf_14,clf_15,clf_16,clf_17,clf_18]

#    classifiers = [clf_1,clf_2]
#    classifiers_init = [clf_1,clf_2]
    
    names=[]
    for c in range(len(classifiers)):
        classifier=classifiers[c]
        class_name=''
        
        if str(classifier)[26:33]=='GRF_KNN':    
            class_name=str(classifier)[26:33]
        elif str(classifier)[22:25]=='KNN':    
            class_name=str(classifier)[22:25]
        elif str(classifier)[34:47]=='HoeffdingTree':
            class_name='HT'
        elif str(classifier)[38:55]=='GRF_HoeffdingTree':
            class_name='GRF_HT'
        elif str(classifier)[43:46]=='HAT':
            class_name=str(classifier)[43:46]
        elif str(classifier)[47:72]=='GRF_HoeffdingAdaptiveTree':
            class_name='GRF_HAT'
#        elif str(classifier)[31:41]=='NaiveBayes':
#            class_name='MNB'            
#        elif str(classifier)[35:49]=='GRF_NaiveBayes':
#            class_name='GRF_MNB'
        elif str(classifier)[0:10]=='GaussianNB':
            class_name='GNB'
        elif str(classifier)[0:14]=='GRF_GaussianNB':
            class_name='GRF_GNB'
        elif str(classifier)[0:13]=='SGDClassifier' and classifier.loss=='hinge':
            class_name='SGD'
        elif str(classifier)[0:17]=='GRF_SGDClassifier' and classifier.loss=='hinge':
            class_name='GRF_SGD'
        elif str(classifier)[0:13]=='SGDClassifier' and classifier.loss=='perceptron':
            class_name='Perceptron'
        elif str(classifier)[0:17]=='GRF_SGDClassifier' and classifier.loss=='perceptron':
            class_name='GRF_Perceptron'
        elif str(classifier)[0:27]=='PassiveAggressiveClassifier':
            class_name='PA'
        elif str(classifier)[0:31]=='GRF_PassiveAggressiveClassifier':
            class_name='GRF_PA'
        elif str(classifier)[0:13]=='MLPClassifier':
            class_name='MLP'
        elif str(classifier)[0:17]=='GRF_MLPClassifier':
            class_name='GRF_MLP'
#        elif str(classifier)[0:9]=='OnlineGRF':
#            class_name=str(classifier)[0:9]
    
        names.append(class_name)
    
    return classifiers,names,classifiers_init
def test_grid():
    clfs = [RRSLVQ(prototypes_per_class=4,sigma=8),RSLVQ(prototypes_per_class=4,sigma=8),HAT(),OzaBaggingAdwin(base_estimator=KNN()),AdaptiveRandomForest(),SAMKNN()]
    cv = CrossValidation(clfs=clfs,max_samples=1000000,test_size=1)
    cv.streams = cv.init_reoccuring_streams()
    cv.test()
    cv.save_summary()
    print("here")