def test_naive_bayes(test_path):
    stream = SEAGenerator(random_state=1)
    stream.prepare_for_use()

    learner = NaiveBayes()

    cnt = 0
    max_samples = 5000
    y_pred = array('i')
    X_batch = []
    y_batch = []
    y_proba = []
    wait_samples = 100

    while cnt < max_samples:
        X, y = stream.next_sample()
        X_batch.append(X[0])
        y_batch.append(y[0])
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            y_pred.append(learner.predict(X)[0])
            y_proba.append(learner.predict_proba(X)[0])
        learner.partial_fit(X, y, classes=stream.target_values)
        cnt += 1

    expected_predictions = array('i', [
        1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1,
        1
    ])

    assert np.alltrue(y_pred == expected_predictions)

    test_file = os.path.join(test_path, 'data_naive_bayes_proba.npy')
    y_proba_expected = np.load(test_file)
    assert np.allclose(y_proba, y_proba_expected)

    expected_info = 'NaiveBayes: nominal attributes: [] - '
    assert learner.get_info() == expected_info

    learner.reset()
    learner.fit(X=np.array(X_batch[:4500]), y=np.array(y_batch[:4500]))

    expected_score = 0.9378757515030061
    assert np.isclose(
        expected_score,
        learner.score(X=np.array(X_batch[4501:]), y=np.array(y_batch[4501:])))

    assert 'estimator' == learner.get_class_type()

    assert type(learner.predict(X)) == np.ndarray
    assert type(learner.predict_proba(X)) == np.ndarray
Exemple #2
0
class LDDDSDA(BaseDistributionDetector):

    def __init__(self, batch_size=100, train_size=100, rho=0.1, alpha=0.05, base_learner=NaiveBayes()):
        super().__init__()
        self.w = batch_size
        self.l = base_learner
        self.n = train_size
        self.alpha = alpha
        self.rho = rho
        self.trained = False

        self.d_train_X, self.d_train_y = [], []
        self.d_buffer_X, self.d_buffer_y = [], []
        self.reset()

    def reset(self):
        super().reset()

    def add_element(self, X, y):

        if self.in_concept_change:
            self.reset()

        X, y = np.asarray(X), np.asarray(y)

        # if X.ndim != 1 or y.ndim != 1:
        #     raise ValueError("input_value should has one dimension")

        if (not self.trained) and len(self.d_train_X) < self.n:
            self.d_train_X.append(X)
            self.d_train_y.append(y)
            if len(self.d_train_X) == self.n:
                self.l.partial_fit(np.asarray(self.d_train_X), np.asarray(self.d_train_y))
                self.trained = True
            return

        if len(self.d_train_X) < self.w:
            self.d_train_X.append(X)
            self.d_train_y.append(y)
            return

        self.d_buffer_X.append(X)
        self.d_buffer_y.append(y)

        if len(self.d_buffer_X) < self.w:
            return

        self.d_train_X, self.d_train_y = self.ldd_dis(np.asarray(self.d_train_X),
                                                      np.asarray(self.d_train_y),
                                                      np.asarray(self.d_buffer_X),
                                                      np.asarray(self.d_buffer_y))
        self.l = NaiveBayes()
        self.l.fit(self.d_train_X, self.d_train_y)

        self.d_train_X = self.d_train_X.tolist()
        self.d_train_y = self.d_train_y.tolist()
        print(len(self.d_train_X))
        self.d_buffer_X = []
        self.d_buffer_y = []

        return

    def predict(self, X):
        return self.l.predict(X)

    def ldd_dis(self, d1_X, d1_y, d2_X, d2_y):
        d = np.append(d1_X, d2_X, axis=0)
        d_y = np.append(d1_y, d2_y, axis=0)
        d1_dec, d1_sta, d1_inc = [], [], []
        d2_dec, d2_sta, d2_inc = [], [], []

        kdtree = KDTree(d)
        d_knn = []
        for i in range(d.shape[0]):
            d_knn.append(set(kdtree.query(X=d[i:i+1],
                                          k=int(d.shape[0] * self.rho),
                                          return_distance=False)[0]))

        indexes = np.arange(d.shape[0])
        np.random.shuffle(indexes)
        _d1 = set(indexes[:d1_X.shape[0]])
        _d2 = set(indexes[d1_X.shape[0]:])
        deltas = []
        for i in range(d.shape[0]):
            x1 = len(d_knn[indexes[i]] & _d1)
            x2 = len(d_knn[indexes[i]] & _d2)
            if i < d1_X.shape[0]:
                deltas.append(x2 / x1 - 1)
            else:
                deltas.append(x1 / x2 - 1)

        delta_std = np.std(deltas, ddof=1)
        theta_dec = stats.norm.ppf(1 - self.alpha, 0, delta_std)
        theta_inc = stats.norm.ppf(self.alpha, 0, delta_std)

        _d1 = set(np.arange(d1_X.shape[0]))
        _d2 = set(np.arange(d1_X.shape[0], d.shape[0]))
        for i in range(d.shape[0]):
            x1 = len(d_knn[i] & _d1)
            x2 = len(d_knn[i] & _d2)
            if i < d1_X.shape[0]:
                delta = x2 / x1 - 1
                if delta < theta_dec:
                    d1_dec.append(i)
                elif delta > theta_inc:
                    d1_inc.append(i)
                else:
                    d1_sta.append(i)
            else:
                delta = x1 / x2 - 1
                if delta < theta_dec:
                    d2_dec.append(i)
                elif delta > theta_inc:
                    d2_inc.append(i)
                else:
                    d2_sta.append(i)

        if len(d1_dec) == 0 and len(d2_inc) == 0:
            return d1_X, d1_y

        self.in_concept_change = True

        aux = []
        if len(d2_dec) != 0:
            aux.append(len(d1_inc) / len(d2_dec))
        if len(d2_sta) != 0:
            aux.append(len(d1_sta) / len(d2_sta))
        if len(d2_inc) != 0:
            aux.append(len(d1_dec) / len(d2_inc))
        k = min(aux)

        d2_dec += d1_inc[:int(k * len(d2_dec))]
        d2_sta += d1_sta[:int(k * len(d2_sta))]
        d2_inc += d1_dec[:int(k * len(d2_inc))]

        aux_indexes = d2_inc + d2_sta + d2_dec

        r = self.w / len(aux_indexes)

        d2_dec = d2_dec[:int(len(d2_dec)*r)]
        d2_sta = d2_sta[:int(len(d2_sta)*r)]
        d2_inc = d1_inc[:int(len(d2_inc)*r)]

        aux_indexes = d2_inc + d2_sta + d2_dec

        return d[aux_indexes], d_y[aux_indexes]