Esempio n. 1
0
def test_sam_knn(package_path):

    test_file = os.path.join(package_path, 'src/skmultiflow/data/datasets/sea_big.csv')

    stream = FileStream(test_file)
    stream.prepare_for_use()

    hyperParams = {'maxSize': 1000, 'nNeighbours': 5, 'knnWeights': 'distance', 'STMSizeAdaption': 'maxACCApprox',
                   'useLTM': False}

    learner = SAMKNN(n_neighbors=hyperParams['nNeighbours'], max_window_size=hyperParams['maxSize'],
                     weighting=hyperParams['knnWeights'],
                     stm_size_option=hyperParams['STMSizeAdaption'], use_ltm=hyperParams['useLTM'])

    cnt = 0
    max_samples = 5000
    predictions = array('d')

    wait_samples = 100

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(learner.predict(X)[0])
        learner.partial_fit(X, y)
        cnt += 1

    expected_predictions = array('d', [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0,
                                       0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0,
                                       1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0,
                                       0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0,
                                       0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0])

    assert np.alltrue(predictions == expected_predictions)
Esempio n. 2
0
def test_grid():
    clfs = [
        OzaBagging(base_estimator=KNN()),
        OzaBaggingAdwin(base_estimator=KNN()),
        AdaptiveRandomForest(),
        SAMKNN()
    ]
    cv = CrossValidation(clfs=clfs, max_samples=1000000, test_size=1)
    cv.streams = [
        ConceptDriftStream(
            stream=LEDGeneratorDrift(has_noise=False,
                                     noise_percentage=0.0,
                                     n_drift_features=3),
            drift_stream=LEDGeneratorDrift(has_noise=False,
                                           noise_percentage=0.0,
                                           n_drift_features=7),
            random_state=None,
            alpha=90.0,  # angle of change grade 0 - 90
            position=250000,
            width=1),
        ConceptDriftStream(
            stream=LEDGeneratorDrift(has_noise=False,
                                     noise_percentage=0.0,
                                     n_drift_features=3),
            drift_stream=LEDGeneratorDrift(has_noise=False,
                                           noise_percentage=0.0,
                                           n_drift_features=7),
            random_state=None,
            alpha=90.0,  # angle of change grade 0 - 90
            position=250000,
            width=50000)
    ]
    cv.test()
    cv.save_summary()
Esempio n. 3
0
def test_sam_knn_coverage(package_path):

    test_file = os.path.join(package_path,
                             'src/skmultiflow/data/datasets/sea_big.csv')

    stream = FileStream(test_file)
    stream.prepare_for_use()

    hyperParams = {
        'maxSize': 50,
        'n_neighbors': 3,
        'weighting': 'uniform',
        'stm_size_option': 'maxACC',
        'min_stm_size': 10,
        'useLTM': True
    }

    learner = SAMKNN(n_neighbors=hyperParams['n_neighbors'],
                     max_window_size=hyperParams['maxSize'],
                     weighting=hyperParams['weighting'],
                     stm_size_option=hyperParams['stm_size_option'],
                     min_stm_size=hyperParams['min_stm_size'],
                     use_ltm=hyperParams['useLTM'])

    cnt = 0
    max_samples = 1000
    predictions = array('i')

    wait_samples = 20

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(learner.predict(X)[0])
        learner.partial_fit(X, y)
        cnt += 1

    expected_predictions = array('i', [
        1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0,
        0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1,
        1
    ])
    assert np.alltrue(predictions == expected_predictions)
Esempio n. 4
0
def evaluation():
    classifiers = [
        GLVQ(prototypes_per_class=4),
        HoeffdingTree(),
        HAT(),
        KNN(),
        SAMKNN(),
        LeverageBagging(),
        KNNAdwin(max_window_size=1000)
    ]  # Array mit Klassifikationsalgorithmen die getestet werden sollen
    cv = CrossValidation(clfs=classifiers, max_samples=1000000, test_size=1)
    cv.streams = cv.init_standard_streams() + cv.init_real_world(
    ) + cv.init_reoccuring_streams(
    )  # initialisiert Stream Generatoren des Scikit-Multiflow Package
    cv.test()
    cv.save_summary()
Esempio n. 5
0
def init_classifiers():
    n_prototypes_per_class = 4
    sigma = 4
    rslvq = RSLVQ(prototypes_per_class=4, sigma=4)
    arslvq = ARSLVQ(prototypes_per_class=n_prototypes_per_class,
                    sigma=sigma,
                    confidence=0.0001,
                    window_size=300)

    oza = OzaBaggingAdwin(base_estimator=KNN())
    adf = AdaptiveRandomForest()
    samknn = SAMKNN()
    hat = HAT()

    clfs = [samknn]
    names = ["SamKnn"]
    # clfs = [rslvq]
    # names = ["rslvq"]
    return clfs, names
Esempio n. 6
0
            total_length = int(total_length)
            for data in response.iter_content(chunk_size=4096):
                dl += len(data)
                f.write(data)
                done = int(50 * dl / total_length)
                sys.stdout.write("\r[%s%s]" % ('=' * done, ' ' * (50 - done)))
                sys.stdout.flush()
    data = np.load(file_name, allow_pickle=True)

    return data


# data = download_data()
#If dataset file is already downloaded
data = np.load(file_name, allow_pickle=True)

sam = SAMKNN()
arf = HoeffdingAdaptiveTreeClassifier()

stream = DataStream(data[:, 1:], data[:, 0].astype(int))
stream.prepare_for_use()

evaluator = EvaluatePrequential(max_samples=10000,
                                max_time=1000,
                                show_plot=True,
                                metrics=['accuracy', 'kappa'])

evaluator.evaluate(stream=stream,
                   model=[sam, arf],
                   model_names=['Sam', 'RSLVQ'])
def test_grid():
    clfs = [RRSLVQ(prototypes_per_class=4,sigma=8),RSLVQ(prototypes_per_class=4,sigma=8),HAT(),OzaBaggingAdwin(base_estimator=KNN()),AdaptiveRandomForest(),SAMKNN()]
    cv = CrossValidation(clfs=clfs,max_samples=1000000,test_size=1)
    cv.streams = cv.init_reoccuring_streams()
    cv.test()
    cv.save_summary()
    print("here")
Esempio n. 8
0
stream.prepare_for_use()

evluator = EvaluatePrequential(batch_size=10,
                               max_samples=10000,
                               show_plot=True,
                               metrics=['accuracy'])

evluator.evaluate(stream=stream, model=cls, model_names=detectors)

arslvq = ARSLVQ(prototypes_per_class=n_prototypes_per_class,
                drift_detector="KS",
                confidence=0.05,
                sigma=sigma)
oza = OzaBaggingAdwin(base_estimator=KNN())
adf = AdaptiveRandomForest()
samknn = SAMKNN()
hat = HAT()
cls = [arslvq, oza, adf, samknn, hat]
detectors = ["ARSLVQ", "OzaAdwin", "ADF", "SamKNN", "HAT"]

s1 = MIXEDGenerator(classification_function=1,
                    random_state=112,
                    balance_classes=False)
s2 = MIXEDGenerator(classification_function=0,
                    random_state=112,
                    balance_classes=False)

stream = ReoccuringDriftStream(stream=s1,
                               drift_stream=s2,
                               random_state=None,
                               alpha=90.0,