def demo():
    """ _test_knn
    
    This demo tests the KNN classifier on a file stream, which gives 
    instances coming from a SEA generator. 
    
    The test computes the performance of the KNN classifier as well as 
    the time to create the structure and classify max_samples (5000 by 
    default) instances.
    
    """
    stream = FileStream('../data/datasets/sea_big.csv', -1, 1)
    stream.prepare_for_use()
    train = 200
    X, y = stream.next_sample(train)
    # t = OneHotToCategorical([[10, 11, 12, 13],
    #                         [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
    #                          36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53]])
    # t2 = OneHotToCategorical([[10, 11, 12, 13],
    #                         [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
    #                          36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53]])
    start = timer()
    knn = KNN(n_neighbors=8, max_window_size=2000, leaf_size=40)
    # pipe = Pipeline([('one_hot_to_categorical', t), ('KNN', knn)])

    # compare = KNeighborsClassifier(n_neighbors=8, algorithm='kd_tree', leaf_size=40, metric='euclidean')

    # pipe2 = Pipeline([('one_hot_to_categorical', t2), ('KNN', compare)])

    # pipe.fit(X, y)
    # pipe2.fit(X, y)
    knn.partial_fit(X, y)
    # compare.fit(X, y)

    n_samples = 0
    max_samples = 5000
    my_corrects = 0
    # compare_corrects = 0

    while n_samples < max_samples:
        X, y = stream.next_sample()
        # my_pred = pipe.predict(X)
        my_pred = knn.predict(X)
        # compare_pred = pipe2.predict(X)
        # compare_pred = compare.predict(X)
        if y[0] == my_pred[0]:
            my_corrects += 1
        # if y[0] == compare_pred[0]:
        #     compare_corrects += 1
        n_samples += 1

    end = timer()

    print('Evaluation time: ' + str(end - start))
    print(str(n_samples) + ' samples analyzed.')
    print('My performance: ' + str(my_corrects / n_samples))
Exemple #2
0
    def __init__(self,
                 base_estimator=KNN(),
                 n_estimators=10,
                 w=6,
                 delta=0.002,
                 enable_code_matrix=False,
                 leverage_algorithm='leveraging_bag',
                 random_state=None):

        super().__init__()
        # default values
        self.ensemble = None
        self.adwin_ensemble = None
        self.n_detected_changes = None
        self.matrix_codes = None
        self.classes = None
        self.init_matrix_codes = None
        self._random_state = None   # This is the actual random_state object used internally
        self.base_estimator = base_estimator
        self.n_estimators = n_estimators
        self.enable_code_matrix = enable_code_matrix
        self.w = w
        self.delta = delta
        if leverage_algorithm not in self.LEVERAGE_ALGORITHMS:
            raise ValueError("Leverage algorithm not supported.")
        self.leverage_algorithm = leverage_algorithm
        self.random_state = random_state
        self.__configure()
def demo():
    """ _test_leverage_bagging

    This demo tests the LeverageBagging classifier on a file stream, which gives 
    instances coming from a SEA generator. 

    The test computes the performance of the LeverageBagging classifier as well 
    as the time to create the structure and classify max_samples (2000 by default) 
    instances.

    """
    logging.basicConfig(format='%(message)s', level=logging.INFO)
    warnings.filterwarnings("ignore", ".*Passing 1d.*")
    stream = SEAGenerator(1, noise_percentage=0.067, random_state=1)
    stream.prepare_for_use()
    clf = LeverageBagging(base_estimator=KNN(n_neighbors=8,
                                             max_window_size=2000,
                                             leaf_size=30),
                          n_estimators=1,
                          random_state=1)
    sample_count = 0
    correctly_classified = 0
    max_samples = 2000
    train_size = 200
    first = True
    if train_size > 0:
        X, y = stream.next_sample(train_size)
        clf.partial_fit(X, y, classes=stream.target_values)
        first = False

    logging.info('%s%%', 0.0)
    while sample_count < max_samples:
        if (sample_count + 1) % (max_samples / 20) == 0:
            logging.info('%s%%',
                         str(((sample_count // (max_samples / 20) + 1) * 5)))
        X, y = stream.next_sample(2)
        my_pred = clf.predict(X)
        if first:
            clf.partial_fit(X, y, classes=stream.target_values)
            first = False
        else:
            clf.partial_fit(X, y)

        if my_pred is not None:
            if y[0] == my_pred[0]:
                correctly_classified += 1

        sample_count += 1

    print(str(sample_count) + ' samples analyzed.')
    print('My performance: ' + str(correctly_classified / sample_count))
    print(clf.get_info())
Exemple #4
0
def test_oza_bagging_adwin():
    stream = SEAGenerator(1, noise_percentage=0.067, random_state=112)
    stream.prepare_for_use()
    knn = KNN(n_neighbors=8, leaf_size=40, max_window_size=2000)
    learner = OzaBaggingAdwin(base_estimator=knn,
                              n_estimators=3,
                              random_state=112)
    first = True

    cnt = 0
    max_samples = 5000
    predictions = []
    wait_samples = 100
    correct_predictions = 0

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(learner.predict(X)[0])
            if y[0] == predictions[-1]:
                correct_predictions += 1
        if first:
            learner.partial_fit(X, y, classes=stream.target_values)
            first = False
        else:
            learner.partial_fit(X, y)
        cnt += 1
    performance = correct_predictions / len(predictions)
    expected_predictions = [
        1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0,
        1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
        1
    ]
    assert np.alltrue(predictions == expected_predictions)

    expected_performance = 0.8979591836734694
    assert np.isclose(expected_performance, performance)

    expected_correct_predictions = 44
    assert correct_predictions == expected_correct_predictions

    assert type(learner.predict(X)) == np.ndarray
    assert type(learner.predict_proba(X)) == np.ndarray

    expected_info = "OzaBaggingAdwin(base_estimator=KNN(leaf_size=40, max_window_size=2000,\n" \
                    "                                   n_neighbors=8, nominal_attributes=None),\n" \
                    "                n_estimators=3, random_state=112)"
    assert learner.get_info() == expected_info
def init_classifiers():
    n_prototypes_per_class = 4
    sigma = 4
    rslvq = RSLVQ(prototypes_per_class=4, sigma=4)
    arslvq = ARSLVQ(prototypes_per_class=n_prototypes_per_class,
                    sigma=sigma,
                    confidence=0.0001,
                    window_size=300)

    oza = OzaBaggingAdwin(base_estimator=KNN())
    adf = AdaptiveRandomForest()
    samknn = SAMKNN()
    hat = HAT()

    clfs = [samknn]
    names = ["SamKnn"]
    # clfs = [rslvq]
    # names = ["rslvq"]
    return clfs, names
Exemple #6
0
def test_leverage_bagging():
    stream = SEAGenerator(1, noise_percentage=0.067, random_state=112)
    stream.prepare_for_use()
    knn = KNN(n_neighbors=8, leaf_size=40, max_window_size=2000)
    learner = LeverageBagging(base_estimator=knn,
                              n_estimators=3,
                              random_state=112)
    first = True

    cnt = 0
    max_samples = 5000
    predictions = []
    wait_samples = 100
    correct_predictions = 0

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(learner.predict(X)[0])
            if y[0] == predictions[-1]:
                correct_predictions += 1
        if first:
            learner.partial_fit(X, y, classes=stream.target_values)
            first = False
        else:
            learner.partial_fit(X, y)
        cnt += 1
    performance = correct_predictions / len(predictions)
    expected_predictions = [
        1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0,
        1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
        1
    ]
    expected_correct_predictions = 42
    expected_performance = 0.8571428571428571

    assert np.alltrue(predictions == expected_predictions)
    assert np.isclose(expected_performance, performance)
    assert correct_predictions == expected_correct_predictions

    assert type(learner.predict(X)) == np.ndarray
    assert type(learner.predict_proba(X)) == np.ndarray
Exemple #7
0
if __name__ == "__main__":

    s1 = MIXEDGenerator(classification_function=1,
                        random_state=112,
                        balance_classes=False)
    s2 = MIXEDGenerator(classification_function=0,
                        random_state=112,
                        balance_classes=False)
    """1. Create stream"""
    stream = ReoccuringDriftStream(
        stream=s1,
        drift_stream=s2,
        random_state=None,
        alpha=90.0,  # angle of change grade 0 - 90
        position=2000,
        width=500)

    stream.prepare_for_use()

    oza = OzaBaggingAdwin(base_estimator=KNN())
    """3. Setup evaluator"""
    evaluator = EvaluatePrequential(
        show_plot=True,
        batch_size=10,
        max_samples=5000,
        metrics=['accuracy', 'kappa_t', 'kappa_m', 'kappa'],
        output_file=None)
    """4. Run evaluator"""
    evaluator.evaluate(stream=stream, model=oza)