Ejemplo n.º 1
0
def test_oracle_better():
    gen = SEAGenerator(random_state=42)
    gen.prepare_for_use()
    evaluator_dyn2sel = dyn2selPrequential(
        n_wait=100, max_samples=1000, pretrain_size=0
    )
    dynse_rank = DYNSEMethod(NaiveBayes(), 100, Rank())
    f = StringIO()
    with redirect_stdout(f):
        evaluator_dyn2sel.evaluate(gen, dynse_rank)
    out = f.getvalue()
    f.close()
    acc_rank = out[out.find("Accuracy") :]
    acc_rank = acc_rank[acc_rank.find(":") + 2 :]
    acc_rank = acc_rank[: acc_rank.find("\n")]
    acc_rank = float(acc_rank)

    evaluator_dyn2sel = dyn2selPrequential(
        n_wait=100, max_samples=1000, pretrain_size=0
    )
    dynse_oracle = DYNSEMethod(NaiveBayes(), 100, Oracle())
    f = StringIO()
    with redirect_stdout(f):
        evaluator_dyn2sel.evaluate(gen, dynse_oracle)
    out = f.getvalue()
    f.close()
    acc_oracle = out[out.find("Accuracy") :]
    acc_oracle = acc_oracle[acc_oracle.find(":") + 2 :]
    acc_oracle = acc_oracle[: acc_oracle.find("\n")]
    acc_oracle = float(acc_oracle)

    assert acc_oracle > acc_rank
Ejemplo n.º 2
0
class Bayes(IncrementalClassifier):
    def __init__(self):
        super().__init__()
        self.clf = NaiveBayes()

    def partial_fit(self, one_row):
        self.clf.partial_fit([one_row[0]], [one_row[1]])

    def predict(self, x):
        return self.clf.predict(x)
Ejemplo n.º 3
0
def test_equality_multiflow():
    gen = SEAGenerator(random_state=42)
    gen.prepare_for_use()
    evaluator_mtflow = mtflowPrequential(
        max_samples=1000, pretrain_size=0, restart_stream=True
    )
    evaluator_dyn2sel = dyn2selPrequential(max_samples=1000, pretrain_size=0)
    nb_mtflow = evaluator_mtflow.evaluate(gen, NaiveBayes())[0].__dict__
    nb_dyn2sel = evaluator_dyn2sel.evaluate(gen, NaiveBayes())[0].__dict__
    del nb_mtflow["_attribute_observers"]
    del nb_dyn2sel["_attribute_observers"]
    assert nb_mtflow == nb_dyn2sel
Ejemplo n.º 4
0
def test_pretrain_size():
    gen = SEAGenerator(random_state=42)
    gen.prepare_for_use()
    evaluator_dyn2sel = dyn2selPrequential(
        n_wait=100, max_samples=1000, pretrain_size=150
    )
    dynse_rank = DYNSEMethod(NaiveBayes(), 100, Rank())
    evaluator_dyn2sel.evaluate(gen, dynse_rank)

    evaluator_dyn2sel = dyn2selPrequential(
        n_wait=100, max_samples=1000, pretrain_size=150
    )
    dynse_oracle = DYNSEMethod(NaiveBayes(), 100, Oracle())
    evaluator_dyn2sel.evaluate(gen, dynse_oracle)
Ejemplo n.º 5
0
    def __init__(self,
                 n_estimators=5,
                 base_estimator=NaiveBayes(),
                 beta=0.8,
                 gamma=0.1,
                 pruning='weakest'):
        """
        Creates a new instance of AdditiveExpertEnsembleClassifier.
        """
        super().__init__()

        self.n_estimators = n_estimators
        self.base_estimator = base_estimator

        self.beta = beta
        self.gamma = gamma
        self.pruning = pruning
        assert self.pruning in ('weakest', 'oldest'), \
            'Unknown pruning strategy: {}'.format(self.pruning)

        # Following attributes are set later
        self.epochs = None
        self.num_classes = None
        self.experts = None

        self.reset()
Ejemplo n.º 6
0
def test_evaluate_delayed_coverage(tmpdir):
    from skmultiflow.data import SEAGenerator
    from skmultiflow.bayes import NaiveBayes

    max_samples = 1000

    # Stream
    data = SEAGenerator(random_state=1)
    # Get X and y
    X, y = data.next_sample(max_samples)
    time = generate_random_dates(seed=1, samples=max_samples)

    # Setup temporal stream
    stream = TemporalDataStream(X, y, time, ordered=False)

    # Learner
    nb = NaiveBayes()

    output_file = os.path.join(str(tmpdir), "prequential_delayed_summary.csv")
    metrics = ['running_time', 'model_size']
    evaluator = EvaluatePrequentialDelayed(max_samples=max_samples,
                                           metrics=metrics,
                                           data_points_for_classification=True,
                                           output_file=output_file)

    evaluator.evaluate(stream=stream, model=nb, model_names=['NB'])
    def __init__(self,
                 n_estimators=10,
                 n_kept_estimators=30,
                 base_estimator=NaiveBayes(),
                 window_size=200,
                 n_splits=5):
        """ Create a new ensemble"""

        super().__init__()

        # top K classifiers
        self.n_estimators = n_estimators

        # total number of classifiers to keep
        self.n_kept_estimators = n_kept_estimators

        # base learner
        self.base_estimator = base_estimator

        # the ensemble in which the classifiers are sorted by their weight
        self.models_pool = []

        # cross validation fold
        self.n_splits = n_splits

        # chunk-related information
        self.window_size = window_size  # chunk size
        self.p = -1  # chunk pointer
        self.X_chunk = None
        self.y_chunk = None
Ejemplo n.º 8
0
def AdditiveExpertEnsemble(n_estimators=5, base_estimator=NaiveBayes(), beta=0.8, gamma=0.1,
                           pruning='weakest'):     # pragma: no cover
    warnings.warn("’AdditiveExpertEnsemble’ has been renamed to ‘AdditiveExpertEnsembleClassifier’ in v0.5.0.\n"
                  "The old name will be removed in v0.7.0", category=FutureWarning)
    return AdditiveExpertEnsembleClassifier(n_estimators=n_estimators,
                                            base_estimator=base_estimator,
                                            beta=beta,
                                            gamma=gamma,
                                            pruning=pruning)
Ejemplo n.º 9
0
def DynamicWeightedMajority(n_estimators=5, base_estimator=NaiveBayes(), period=50, beta=0.5,
                            theta=0.01):     # pragma: no cover
    warnings.warn("'DynamicWeightedMajority' has been renamed to 'DynamicWeightedMajorityClassifier' in v0.5.0.\n"
                  "The old name will be removed in v0.7.0", category=FutureWarning)
    return DynamicWeightedMajorityClassifier(n_estimators=n_estimators,
                                             base_estimator=base_estimator,
                                             period=period,
                                             beta=beta,
                                             theta=theta)
Ejemplo n.º 10
0
def AccuracyWeightedEnsemble(n_estimators=10, n_kept_estimators=30, base_estimator=NaiveBayes(), window_size=200,
                             n_splits=5):     # pragma: no cover
    warnings.warn("’AccuracyWeightedEnsemble’ has been renamed to ‘AccuracyWeightedEnsembleClassifier’ in v0.5.0.\n"
                  "The old name will be removed in v0.7.0", category=FutureWarning)
    return AccuracyWeightedEnsembleClassifier(n_estimators=n_estimators,
                                              n_kept_estimators=n_kept_estimators,
                                              base_estimator=base_estimator,
                                              window_size=window_size,
                                              n_splits=n_splits)
Ejemplo n.º 11
0
 def __init__(self,
              n_estimators=5,
              base_estimator=NaiveBayes(),
              period=50,
              beta=0.5,
              theta=0.01):
     """
     Creates a new instance of DynamicWeightedMajority.
     """
     super().__init__(n_estimators, base_estimator, period, beta, theta)
Ejemplo n.º 12
0
def evaluation_Naive_Bayes():
    classifiers = [
        NaiveBayes()
    ]  # Array mit Klassifikationsalgorithmen die getestet werden sollen
    cv = CrossValidation(clfs=classifiers, max_samples=1000000, test_size=1)
    cv.streams = init_standard_streams_naive_bayes(
    ) + init_real_world_naive_bayes() + cv.init_reoccuring_streams(
    )  # initialisiert Stream Generatoren des Scikit-Multiflow Package
    cv.test()
    cv.save_summary()
Ejemplo n.º 13
0
    def add_element(self, X, y):

        if self.in_concept_change:
            self.reset()

        X, y = np.asarray(X), np.asarray(y)

        # if X.ndim != 1 or y.ndim != 1:
        #     raise ValueError("input_value should has one dimension")

        if (not self.trained) and len(self.d_train_X) < self.n:
            self.d_train_X.append(X)
            self.d_train_y.append(y)
            if len(self.d_train_X) == self.n:
                self.l.partial_fit(np.asarray(self.d_train_X), np.asarray(self.d_train_y))
                self.trained = True
            return

        if len(self.d_train_X) < self.w:
            self.d_train_X.append(X)
            self.d_train_y.append(y)
            return

        self.d_buffer_X.append(X)
        self.d_buffer_y.append(y)

        if len(self.d_buffer_X) < self.w:
            return

        self.d_train_X, self.d_train_y = self.ldd_dis(np.asarray(self.d_train_X),
                                                      np.asarray(self.d_train_y),
                                                      np.asarray(self.d_buffer_X),
                                                      np.asarray(self.d_buffer_y))
        self.l = NaiveBayes()
        self.l.fit(self.d_train_X, self.d_train_y)

        self.d_train_X = self.d_train_X.tolist()
        self.d_train_y = self.d_train_y.tolist()
        print(len(self.d_train_X))
        self.d_buffer_X = []
        self.d_buffer_y = []

        return
    def __init__(self,
                 n_estimators=10,
                 n_kept_estimators=30,
                 base_estimator=NaiveBayes(),
                 window_size=200,
                 n_splits=5):
        """ Create a new ensemble"""

        super().__init__(n_estimators, n_kept_estimators, base_estimator,
                         window_size, n_splits)
Ejemplo n.º 15
0
def test_ensemble_size():
    # since each member of the ensemble is initialized when the number of instances reach the chunk size, the size of
    # the ensemble should n_samples // chunk_size
    chunk_size = 100
    n_samples = 1050
    gen = SEAGenerator(balance_classes=True)
    # gen.prepare_for_use()
    mde = MDEMethod(NaiveBayes(), chunk_size, KNORAE(), alpha=0.0)
    X, y = gen.next_sample(n_samples)
    mde.partial_fit(X, y)
    assert len(mde.ensemble) == n_samples // chunk_size
Ejemplo n.º 16
0
def test_ensemble_size():
    # since each member of the ensemble is initialized when the number of instances reach the chunk size, the size of
    # the ensemble should n_samples // chunk_size
    chunk_size = 100
    n_samples = 1050
    gen = SEAGenerator()
    # gen.prepare_for_use()
    dynse = DYNSEMethod(NaiveBayes(), chunk_size, ModifiedRank())
    X, y = gen.next_sample(n_samples)
    dynse.partial_fit(X, y)
    assert len(dynse.ensemble) == n_samples // chunk_size
Ejemplo n.º 17
0
def test_clone():
    stream = SEAGenerator(random_state=1)

    learner = NaiveBayes()

    cnt = 0
    max_samples = 5000
    y_pred = array('i')
    X_batch = []
    y_batch = []
    y_proba = []
    wait_samples = 100

    while cnt < max_samples:
        X, y = stream.next_sample()
        X_batch.append(X[0])
        y_batch.append(y[0])
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            y_pred.append(learner.predict(X)[0])
            y_proba.append(learner.predict_proba(X)[0])
        learner.partial_fit(X, y, classes=[0, 1])
        cnt += 1

    cloned = clone(learner)

    assert learner._observed_class_distribution != {} and cloned._observed_class_distribution == {}
Ejemplo n.º 18
0
    def __init__(self,
                 stable_estimator=NaiveBayes(),
                 reactive_estimator=NaiveBayes(),
                 window_size=12,
                 threshold=0.2):
        super().__init__()
        # default values

        self.c = None
        self.stable_base_estimator = stable_estimator
        self.reactive_base_estimator = reactive_estimator
        self.stable_estimator = None
        self.reactive_estimator = None
        self.t = None
        self.classes = None
        self.w = window_size
        self.theta = math.floor(self.w * threshold)
        self.instances_X = None
        self.instances_y = None
        self.change_detected = None
        self.number_of_errors = None
        self.__configure()
Ejemplo n.º 19
0
def test_accuracy():
    # an ensemble of Naive Bayes should perform at the very least 85% with 200 instances of SEAGenerator
    chunk_size = 100
    n_samples_train = 1050
    n_samples_test = 200
    gen = SEAGenerator(noise_percentage=0.0)
    # gen.prepare_for_use()
    nb = NaiveBayes()
    mde = MDEMethod(nb, chunk_size, KNORAU())
    X_train, y_train = gen.next_sample(n_samples_train)
    X_test, y_test = gen.next_sample(n_samples_test)
    mde.partial_fit(X_train, y_train)
    assert mde.score(X_test, y_test) > 0.85
Ejemplo n.º 20
0
def test_online_csb2():
    stream = SEAGenerator(1, noise_percentage=0.067, random_state=112)
    stream.prepare_for_use()
    nb = NaiveBayes()
    learner = OnlineCSB2Classifier(base_estimator=nb,
                                   n_estimators=3,
                                   cost_positive=1,
                                   cost_negative=0.9,
                                   random_state=112)
    first = True

    cnt = 0
    max_samples = 5000
    predictions = []
    wait_samples = 100
    correct_predictions = 0

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(learner.predict(X)[0])
            if y[0] == predictions[-1]:
                correct_predictions += 1
        if first:
            learner.partial_fit(X, y, classes=stream.target_values)
            first = False
        else:
            learner.partial_fit(X, y)
        cnt += 1
    performance = correct_predictions / len(predictions)
    expected_predictions = [
        1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0,
        1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
        1
    ]

    expected_correct_predictions = 43
    expected_performance = 0.8775510204081632

    assert np.alltrue(predictions == expected_predictions)
    assert np.isclose(expected_performance, performance)
    assert correct_predictions == expected_correct_predictions

    assert type(learner.predict(X)) == np.ndarray
    assert type(learner.predict_proba(X)) == np.ndarray

    expected_info = "OnlineCSB2Classifier(base_estimator=NaiveBayes(nominal_attributes=None), cost_negative=0.9, " \
                    "cost_positive=1, drift_detection=True, n_estimators=3, random_state=112)"
    info = " ".join([line.strip() for line in learner.get_info().split()])
    assert info == expected_info
def test_online_rus_1():
    stream = SEAGenerator(1, noise_percentage=0.067, random_state=112)
    stream.prepare_for_use()
    nb = NaiveBayes()
    learner = OnlineRUSBoost(base_estimator=nb,
                             n_estimators=3,
                             sampling_rate=5,
                             algorithm=1,
                             random_state=112)
    first = True

    cnt = 0
    max_samples = 5000
    predictions = []
    wait_samples = 100
    correct_predictions = 0

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(learner.predict(X)[0])
            if y[0] == predictions[-1]:
                correct_predictions += 1
        if first:
            learner.partial_fit(X, y, classes=stream.target_values)
            first = False
        else:
            learner.partial_fit(X, y)
        cnt += 1
    performance = correct_predictions / len(predictions)
    expected_predictions = [
        1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
        0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1,
        1
    ]

    expected_correct_predictions = 33
    expected_performance = 0.673469387755102

    assert np.alltrue(predictions == expected_predictions)
    assert np.isclose(expected_performance, performance)
    assert correct_predictions == expected_correct_predictions

    assert type(learner.predict(X)) == np.ndarray
    assert type(learner.predict_proba(X)) == np.ndarray

    expected_info = "OnlineRUSBoost(algorithm=1, base_estimator=NaiveBayes(nominal_attributes=None),\n" \
                    "               drift_detection=True, n_estimators=3, random_state=112,\n" \
                    "               sampling_rate=5)"
    assert learner.get_info() == expected_info
def test_awe():
    # prepare the stream
    stream = HyperplaneGenerator(random_state=1)
    stream.prepare_for_use()

    # prepare the ensemble
    classifier = AccuracyWeightedEnsemble(n_estimators=5,
                                          n_kept_estimators=10,
                                          base_estimator=NaiveBayes(),
                                          window_size=200,
                                          n_splits=5)

    # test the classifier
    max_samples = 5000
    cnt = 0
    wait_samples = 100
    predictions = array('i')
    correct = 0
    while cnt < max_samples:
        X, y = stream.next_sample()
        pred = classifier.predict(X)
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(int(pred[0]))
        classifier.partial_fit(X, y)
        cnt += 1
        if pred[0] == y:
            correct += 1

    # assert model predictions
    expected_predictions = array('i', [
        0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0,
        1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1,
        1
    ])

    # assert model performance
    expected_accuracy = 0.875
    accuracy = correct / max_samples
    assert expected_accuracy == accuracy

    assert np.alltrue(predictions == expected_predictions)

    # assert model information
    expected_info = "AccuracyWeightedEnsemble: n_estimators: 5 - " \
                    "n_kept_estimators: 10 - " \
                    "base_estimator: NaiveBayes: nominal attributes: [] -  - " \
                    "window_size: 200 - " \
                    "n_splits: 5"
    assert classifier.get_info() == expected_info
def test_additive_expert_ensemble_weakest():
    stream = SEAGenerator(1, noise_percentage=0.067, random_state=112)
    stream.prepare_for_use()

    learner = AdditiveExpertEnsemble(3,
                                     NaiveBayes(),
                                     beta=0.5,
                                     gamma=0.1,
                                     pruning='weakest')

    cnt = 0
    max_samples = 5000
    predictions = []
    wait_samples = 100
    correct_predictions = 0
    first = True

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(learner.predict(X)[0])
            if y[0] == predictions[-1]:
                correct_predictions += 1
        if first:
            learner.partial_fit(X, y, classes=stream.target_values)
            first = False
        else:
            learner.partial_fit(X, y)
        cnt += 1
    performance = correct_predictions / len(predictions)

    expected_predictions = [
        1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0,
        1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
        1
    ]
    expected_correct_predictions = 45
    expected_performance = 0.9183673469387755

    assert np.alltrue(predictions == expected_predictions)
    assert np.isclose(expected_performance, performance)
    assert correct_predictions == expected_correct_predictions

    assert type(learner.predict(X)) == np.ndarray

    expected_info = "AdditiveExpertEnsemble(base_estimator=NaiveBayes(nominal_attributes=None),\n" \
                    "                       beta=0.5, gamma=0.1, n_estimators=3, pruning='weakest')"
    assert learner.get_info() == expected_info
def test_online_smote_bagging():
    stream = SEAGenerator(1, noise_percentage=0.067, random_state=112)
    nb = NaiveBayes()
    learner = OnlineSMOTEBaggingClassifier(base_estimator=nb,
                                           n_estimators=3,
                                           sampling_rate=2,
                                           random_state=112)
    first = True

    cnt = 0
    max_samples = 5000
    predictions = []
    wait_samples = 100
    correct_predictions = 0

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(learner.predict(X)[0])
            if y[0] == predictions[-1]:
                correct_predictions += 1
        if first:
            learner.partial_fit(X, y, classes=[0, 1])
            first = False
        else:
            learner.partial_fit(X, y)
        cnt += 1
    performance = correct_predictions / len(predictions)
    expected_predictions = [
        1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
        1
    ]

    expected_correct_predictions = 42
    expected_performance = 0.8571428571428571

    assert np.alltrue(predictions == expected_predictions)
    assert np.isclose(expected_performance, performance)
    assert correct_predictions == expected_correct_predictions

    assert type(learner.predict(X)) == np.ndarray
    assert type(learner.predict_proba(X)) == np.ndarray

    expected_info = "OnlineSMOTEBaggingClassifier(base_estimator=NaiveBayes(nominal_attributes=None), " \
                    "drift_detection=True, n_estimators=3, random_state=None, sampling_rate=2)"
    info = " ".join([line.strip() for line in learner.get_info().split()])
    assert info == expected_info
def test_dynamic_weighted_majority():
    stream = SEAGenerator(1, noise_percentage=0.067, random_state=112)

    learner = DynamicWeightedMajorityClassifier(3,
                                                NaiveBayes(),
                                                beta=0.5,
                                                theta=0.01)

    cnt = 0
    max_samples = 5000
    predictions = []
    wait_samples = 100
    correct_predictions = 0
    first = True

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(learner.predict(X)[0])
            if y[0] == predictions[-1]:
                correct_predictions += 1
        if first:
            learner.partial_fit(X, y, classes=stream.target_values)
            first = False
        else:
            learner.partial_fit(X, y)
        cnt += 1
    performance = correct_predictions / len(predictions)
    expected_predictions = [
        1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0,
        1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
        1
    ]
    expected_correct_predictions = 44
    expected_performance = 0.8979591836734694

    assert np.alltrue(predictions == expected_predictions)
    assert np.isclose(expected_performance, performance)
    assert correct_predictions == expected_correct_predictions

    assert type(learner.predict(X)) == np.ndarray

    expected_info = 'DynamicWeightedMajorityClassifier(base_estimator=NaiveBayes(nominal_attributes=None),\n' \
                    '                                  beta=0.5, n_estimators=3, period=50,\n' \
                    '                                  theta=0.01)'
    assert learner.get_info() == expected_info
def test_online_rus_3():
    stream = SEAGenerator(1, noise_percentage=0.067, random_state=112)
    nb = NaiveBayes()
    learner = OnlineRUSBoostClassifier(base_estimator=nb,
                                       n_estimators=3,
                                       sampling_rate=5,
                                       algorithm=3,
                                       random_state=112)
    first = True

    cnt = 0
    max_samples = 5000
    predictions = []
    wait_samples = 100
    correct_predictions = 0

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(learner.predict(X)[0])
            if y[0] == predictions[-1]:
                correct_predictions += 1
        if first:
            learner.partial_fit(X, y, classes=[0, 1])
            first = False
        else:
            learner.partial_fit(X, y)
        cnt += 1
    performance = correct_predictions / len(predictions)
    expected_predictions = [
        1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1,
        1
    ]

    expected_correct_predictions = 35
    expected_performance = 0.7142857142857143

    assert np.alltrue(predictions == expected_predictions)
    assert np.isclose(expected_performance, performance)
    assert correct_predictions == expected_correct_predictions

    assert type(learner.predict(X)) == np.ndarray
    assert type(learner.predict_proba(X)) == np.ndarray
    assert type(learner.predict_proba(X)) == np.ndarray
def test_leverage_bagging_code_matrix():
    nb = NaiveBayes()

    # enable the output detection code matrix
    learner = LeveragingBaggingClassifier(base_estimator=nb,
                                          n_estimators=5,
                                          random_state=12,
                                          enable_code_matrix=True)

    y_expected = np.asarray([0, 0, 3, 2, 3, 1, 4, 1, 3, 4,
                             2, 4, 2, 2, 0, 0, 2, 4, 2, 4,
                             0, 4, 2, 4, 2, 4, 0, 4, 1, 3,
                             2, 1, 2, 4, 2, 4, 1, 3, 0, 4,
                             2, 0, 0, 4, 3, 2, 4, 4, 2, 4], dtype=np.int)

    run_prequential_supervised(RandomTreeGenerator(tree_random_state=1, sample_random_state=12, n_classes=5),
                               learner, max_samples=2000, n_wait=40, target_values=[0,1,2,3,4], y_expected=y_expected)
def test_leverage_bagging_me():
    nb = NaiveBayes()

    # leveraging_bag_me
    learner = LeveragingBaggingClassifier(base_estimator=nb,
                                          n_estimators=5,
                                          random_state=112,
                                          leverage_algorithm='leveraging_bag_me')

    y_expected = np.asarray([0, 0, 0, 1, 0, 1, 0, 0, 1, 0,
                             0, 0, 0, 1, 0, 0, 1, 1, 0, 0,
                             1, 0, 0, 0, 1, 1, 0, 1, 0, 1,
                             0, 0, 0, 1, 1, 0, 1, 1, 1, 0,
                             1, 0, 1, 0, 0, 1, 1, 0, 1, 0], dtype=np.int)

    run_prequential_supervised(ConceptDriftStreamGenerator(position=500, width=100, random_state=112),
                               learner, max_samples=2000, n_wait=40, target_values=[0,1], y_expected=y_expected)
Ejemplo n.º 29
0
def test_online_adac2():
    stream = SEAGenerator(1, noise_percentage=0.067, random_state=112)
    stream.prepare_for_use()
    nb = NaiveBayes()
    learner = OnlineAdaC2(base_estimator=nb,
                          n_estimators=3,
                          random_state=112,
                          cost_positive=1,
                          cost_negative=1)
    first = True

    cnt = 0
    max_samples = 5000
    predictions = []
    wait_samples = 100
    correct_predictions = 0

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(learner.predict(X)[0])
            if y[0] == predictions[-1]:
                correct_predictions += 1
        if first:
            learner.partial_fit(X, y, classes=stream.target_values)
            first = False
        else:
            learner.partial_fit(X, y)
        cnt += 1
    performance = correct_predictions / len(predictions)
    expected_predictions = [
        1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0,
        1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
        1
    ]
    expected_correct_predictions = 44
    expected_performance = 0.8979591836734694

    assert np.alltrue(predictions == expected_predictions)
    assert np.isclose(expected_performance, performance)
    assert correct_predictions == expected_correct_predictions

    assert type(learner.predict(X)) == np.ndarray
    assert type(learner.predict_proba(X)) == np.ndarray
Ejemplo n.º 30
0
    def __init__(self,
                 min_estimators=5,
                 max_estimators=20,
                 base_estimators=[NaiveBayes(),
                                  HoeffdingTreeClassifier()],
                 period=1000,
                 alpha=0.002,
                 beta=1.5,
                 theta=0.05,
                 enable_diversity=True):
        """
        Creates a new instance of DiversifiedDynamicClassWeightedClassifier.
        """
        super().__init__()

        self.enable_diversity = enable_diversity
        self.min_estimators = min_estimators
        self.max_estimators = max_estimators
        self.base_estimators = base_estimators

        self.alpha = alpha
        self.beta = beta
        self.theta = theta
        self.period = period

        self.p = -1

        self.n_estimators = max_estimators
        self.epochs = None
        self.num_classes = None
        self.experts = None
        self.div = []

        self.window_size = None
        self.X_batch = None
        self.y_batch = None
        self.y_batch_experts = None

        # custom measurements atributes
        self.custom_measurements = []
        self.custom_time = []

        self.reset()