def run(X, y, hyperParams):
    """ run

    Test function for SAMKNN, not integrated with evaluation modules.

    Parameters
    ----------
    X: numpy.ndarray of shape (n_samples, n_features)
        The feature's matrix, coded as 64 bits.

    y: numpy.array of size n_samples
        The labels for all the samples in X coded as 8 bits.

    hyperParams: dict
        A dictionary containing the __init__ params for the SAMKNN.

    """
    r, c = get_dimensions(X)
    classifier = SAMKNN(n_neighbors=hyperParams['nNeighbours'],
                        max_window_size=hyperParams['maxSize'],
                        weighting=hyperParams['knnWeights'],
                        stm_size_option=hyperParams['STMSizeAdaption'],
                        use_ltm=hyperParams['useLTM'])
    logging.info('applying model on dataset')
    predicted_labels = []
    true_labels = []
    for i in range(r):
        pred = classifier.predict(np.asarray([X[i]]))
        predicted_labels.append(pred[0])
        true_labels.append(y[i])
        classifier = classifier.partial_fit(np.asarray([X[i]]), np.asarray([y[i]]), None)
        if (i % (r // 20)) == 0:
            logging.info(str((i // (r / 20))*5) + "%")
    accuracy = accuracy_score(true_labels, predicted_labels)
    logging.info('error rate %.2f%%' % (100-100*accuracy))
def demo():

    # The classifier we will use (other options: SAMKNN, LeverageBagging, SGD)
    h1 = [
        HoeffdingTree(),
        SAMKNN(),
        LeverageBagging(random_state=1),
        SGDClassifier()
    ]
    h2 = [
        HoeffdingTree(),
        SAMKNN(),
        LeverageBagging(random_state=1),
        SGDClassifier()
    ]
    h3 = [
        HoeffdingTree(),
        SAMKNN(),
        LeverageBagging(random_state=1),
        SGDClassifier()
    ]
    model_names = ['HT', 'SAMKNN', 'LBkNN', 'SGDC']

    # Demo 1 -- plot should not fail
    demo_parameterized(h1, model_names=model_names)

    # Demo 2 -- csv output should look nice
    demo_parameterized(h2, "sea_stream.csv", False, model_names)

    # Demo 3 -- should not give "'NoneType' object is not iterable" error
    demo_parameterized(h3, "covtype.csv", False, model_names)
def test_grid():
    clfs = [AdaptiveRandomForest(), SAMKNN(), HAT()]
    cv = CrossValidation(clfs=clfs, max_samples=1000000, test_size=1)
    cv.streams = cv.init_real_world() + cv.init_standard_streams(
    ) + cv.init_reoccuring_standard_streams()
    cv.test()
    cv.save_summary()
Exemple #4
0
def demo(output_file=None, instances=50000):
    """ _test_sam_knn_prequential

    This demo shows how to produce a prequential evaluation.

    The first thing needed is a stream. For this case we use a file stream 
    which gets its samples from the movingSquares.csv file, inside the datasets 
    folder.

    Then we need to setup a classifier, which in this case is an instance 
    of scikit-multiflow's SAMKNN. Then, optionally we create a 
    pipeline structure, initialized on that classifier.

    The evaluation is then run.

    Parameters
    ----------
    output_file: string
        The name of the csv output file

    instances: int
        The evaluation's max number of instances

    """
    # Setup the File Stream
    stream = FileStream("../data/datasets/movingSquares.csv", -1, 1)
    # stream = WaveformGenerator()
    stream.prepare_for_use()

    # Setup the classifier
    # classifier = SGDClassifier()
    # classifier = KNNAdwin(n_neighbors=8, max_window_size=2000,leaf_size=40, categorical_list=None)
    # classifier = OzaBaggingAdwin(base_estimator=KNN(n_neighbors=8, max_window_size=2000, leaf_size=30, categorical_list=None))
    classifier = SAMKNN(n_neighbors=5,
                        weighting='distance',
                        max_window_size=1000,
                        stm_size_option='maxACCApprox',
                        use_ltm=False)
    # classifier = SGDRegressor()
    # classifier = PerceptronMask()

    # Setup the pipeline
    # pipe = Pipeline([('Classifier', classifier)])

    # Setup the evaluator
    evaluator = EvaluatePrequential(pretrain_size=0,
                                    max_samples=instances,
                                    batch_size=1,
                                    n_wait=100,
                                    max_time=1000,
                                    output_file=output_file,
                                    show_plot=True,
                                    metrics=['performance'])

    # Evaluate
    evaluator.evaluate(stream=stream, model=classifier)
def test_sam_knn():

    stream = SEAGenerator(random_state=1)
    stream.prepare_for_use()

    hyperParams = {'maxSize': 1000, 'nNeighbours': 5, 'knnWeights': 'distance', 'STMSizeAdaption': 'maxACCApprox',
                   'use_ltm': False}

    learner = SAMKNN(n_neighbors=hyperParams['nNeighbours'], max_window_size=hyperParams['maxSize'],
                     weighting=hyperParams['knnWeights'],
                     stm_size_option=hyperParams['STMSizeAdaption'], use_ltm=hyperParams['use_ltm'])

    cnt = 0
    max_samples = 5000
    predictions = array('d')

    wait_samples = 100

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(learner.predict(X)[0])
        learner.partial_fit(X, y)
        cnt += 1

    expected_predictions = array('i', [1, 1, 1, 0, 1, 1, 0, 0, 0, 1,
                                       1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
                                       1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
                                       0, 0, 1, 1, 0, 0, 0, 0, 1, 1,
                                       1, 1, 0, 1, 0, 0, 1, 0, 1])

    assert np.alltrue(predictions == expected_predictions)

    assert type(learner.predict(X)) == np.ndarray

    with pytest.raises(NotImplementedError):
        learner.predict_proba(X)
def test_sam_knn_coverage():

    stream = SEAGenerator(random_state=1)
    stream.prepare_for_use()

    hyperParams = {'maxSize': 50,
                   'n_neighbors': 3,
                   'weighting': 'uniform',
                   'stm_size_option': 'maxACC',
                   'min_stm_size': 10,
                   'use_ltm': True}

    learner = SAMKNN(n_neighbors=hyperParams['n_neighbors'],
                     max_window_size=hyperParams['maxSize'],
                     weighting=hyperParams['weighting'],
                     stm_size_option=hyperParams['stm_size_option'],
                     min_stm_size=hyperParams['min_stm_size'],
                     use_ltm=hyperParams['use_ltm'])

    cnt = 0
    max_samples = 1000
    predictions = array('i')

    wait_samples = 20

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(learner.predict(X)[0])
        learner.partial_fit(X, y)
        cnt += 1

    expected_predictions = array('i', [1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
                                       0, 1, 0, 0, 1, 1, 1, 1, 1, 0,
                                       0, 1, 1, 1, 1, 1, 0, 1, 1, 1,
                                       1, 1, 1, 1, 0, 1, 1, 1, 1, 0,
                                       0, 0, 0, 0, 0, 1, 1, 1, 0])
    assert np.alltrue(predictions == expected_predictions)

    expected_info = "SAMKNN(ltm_size=0.4, max_window_size=None, min_stm_size=10, n_neighbors=3,\n" \
                    "       stm_size_option='maxACC', use_ltm=True, weighting='uniform')"
    assert learner.get_info() == expected_info
Exemple #7
0
                                alpha=90.0, position=N_SAMPLES / 2)
    stream.name = 'LED ABRUPBT'
    STREAMS.append(stream)

    """Evaluate on ARSLVQ, SAM and HAT"""
    # TODO NB and ARSLVQ working
    for stream in STREAMS:
        print('{}:\n'.format(stream.name))
        f = open(res_file, 'a+')
        f.write('{}:\n'.format(stream.name))
        f.close()
        
        rrslvq = RRSLVQ(prototypes_per_class=2,confidence=1e-10)
        high_dim_test(copy.copy(stream), copy.copy(rrslvq), N_SAMPLES)
        low_dim_test(copy.copy(stream), copy.copy(rrslvq), N_SAMPLES)

        arslvq = RSLVQ(gradient_descent='Adadelta')
        high_dim_test(copy.copy(stream), copy.copy(arslvq), N_SAMPLES)
        low_dim_test(copy.copy(stream), copy.copy(arslvq), N_SAMPLES)

        samknn = SAMKNN(max_window_size=5000,stm_size_option=None)
        high_dim_test(copy.copy(stream), copy.copy(samknn), N_SAMPLES)
        low_dim_test(copy.copy(stream), copy.copy(samknn), N_SAMPLES)

        arf = ARF()
        high_dim_test(copy.copy(stream), copy.copy(arf), N_SAMPLES)
        low_dim_test(copy.copy(stream), copy.copy(arf), N_SAMPLES)



# while 1:
#    line = f.readline()
#    if line == '': break
#    arr = np.array(line.split(','), dtype='float64')
#    labels.append(arr[1])

# f.close()

# HIGH-DIM
X, y = data[:, :-1], data[:, -1]

clfs = [
    RSLVQ(prototypes_per_class=2, gradient_descent="Adadelta"),
    RRSLVQ(prototypes_per_class=2, confidence=1e-10),
    ARF(),
    SAMKNN()
]

for clf in clfs:
    acc_fold = []
    kappa_fold = []
    time_fold = []

    for _ in range(5):
        _clf = copy.deepcopy(clf)
        start_time = time.time()
        y_true = []
        y_pred = []

        x = data[0, :-1].reshape(1, 1000)
        y = data[0, -1].reshape(1, 1)
def test_led():
    led_a = ConceptDriftStream(
        stream=LEDGeneratorDrift(has_noise=False,
                                 noise_percentage=0.0,
                                 n_drift_features=3),
        drift_stream=LEDGeneratorDrift(has_noise=False,
                                       noise_percentage=0.0,
                                       n_drift_features=7),
        random_state=None,
        alpha=90.0,  # angle of change grade 0 - 90
        position=250000,
        width=1)

    led_a.name = "led_a"
    led_g = ConceptDriftStream(stream=LEDGeneratorDrift(has_noise=False,
                                                        noise_percentage=0.0,
                                                        n_drift_features=3),
                               drift_stream=LEDGeneratorDrift(
                                   has_noise=False,
                                   noise_percentage=0.0,
                                   n_drift_features=7),
                               random_state=None,
                               position=250000,
                               width=50000)
    led_g.name = "led_g"
    led_fa = ReoccuringDriftStream(
        stream=LEDGeneratorDrift(has_noise=False,
                                 noise_percentage=0.0,
                                 n_drift_features=3),
        drift_stream=LEDGeneratorDrift(has_noise=False,
                                       noise_percentage=0.0,
                                       n_drift_features=7),
        random_state=None,
        alpha=90.0,  # angle of change grade 0 - 90
        position=2000,
        width=1)

    led_fg = ReoccuringDriftStream(
        stream=LEDGeneratorDrift(has_noise=False,
                                 noise_percentage=0.0,
                                 n_drift_features=3),
        drift_stream=LEDGeneratorDrift(has_noise=False,
                                       noise_percentage=0.0,
                                       n_drift_features=7),
        random_state=None,
        position=2000,
        width=1000)

    np = 2
    sigma = 3
    clfs = [
        ARSLVQ(prototypes_per_class=np,
               sigma=sigma,
               confidence=0.0001,
               window_size=1500),
        OzaBaggingAdwin(),
        AdaptiveRandomForest(),
        HAT(),
        RSLVQ(prototypes_per_class=np, sigma=sigma),
        SAMKNN()
    ]

    cv = CrossValidation(clfs=clfs, parallel=1)
    cv.streams = [led_a, led_g, led_fa, led_fg]
    cv.search()
    cv.save_summary()