Python NaiveBayes.predict Examples

Programming Language: Python

Namespace/Package Name: skmultiflow.bayes

Class/Type: NaiveBayes

Method/Function: predict

Examples at hotexamples.com: 5

Python NaiveBayes.predict - 5 examples found. These are the top rated real world Python examples of skmultiflow.bayes.NaiveBayes.predict extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

NaiveBayes(30)

partial_fit(5)

predict(5)

predict_proba(3)

fit(2)

get_class_type(1)

get_info(1)

reset(1)

score(1)

Example #1

Show file

File: test_naive_bayes.py Project: hyliqd/scikit-multiflow

def test_naive_bayes(test_path):
    stream = SEAGenerator(random_state=1)
    stream.prepare_for_use()

    learner = NaiveBayes()

    cnt = 0
    max_samples = 5000
    y_pred = array('i')
    X_batch = []
    y_batch = []
    y_proba = []
    wait_samples = 100

    while cnt < max_samples:
        X, y = stream.next_sample()
        X_batch.append(X[0])
        y_batch.append(y[0])
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            y_pred.append(learner.predict(X)[0])
            y_proba.append(learner.predict_proba(X)[0])
        learner.partial_fit(X, y, classes=stream.target_values)
        cnt += 1

    expected_predictions = array('i', [
        1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1,
        1
    ])

    assert np.alltrue(y_pred == expected_predictions)

    test_file = os.path.join(test_path, 'data_naive_bayes_proba.npy')
    y_proba_expected = np.load(test_file)
    assert np.allclose(y_proba, y_proba_expected)

    expected_info = 'NaiveBayes: nominal attributes: [] - '
    assert learner.get_info() == expected_info

    learner.reset()
    learner.fit(X=np.array(X_batch[:4500]), y=np.array(y_batch[:4500]))

    expected_score = 0.9378757515030061
    assert np.isclose(
        expected_score,
        learner.score(X=np.array(X_batch[4501:]), y=np.array(y_batch[4501:])))

    assert 'estimator' == learner.get_class_type()

    assert type(learner.predict(X)) == np.ndarray
    assert type(learner.predict_proba(X)) == np.ndarray

Example #2

Show file

File: test_base.py Project: trajkova-elena/scikit-multiflow

def test_clone():
    stream = SEAGenerator(random_state=1)

    learner = NaiveBayes()

    cnt = 0
    max_samples = 5000
    y_pred = array('i')
    X_batch = []
    y_batch = []
    y_proba = []
    wait_samples = 100

    while cnt < max_samples:
        X, y = stream.next_sample()
        X_batch.append(X[0])
        y_batch.append(y[0])
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            y_pred.append(learner.predict(X)[0])
            y_proba.append(learner.predict_proba(X)[0])
        learner.partial_fit(X, y, classes=[0, 1])
        cnt += 1

    cloned = clone(learner)

    assert learner._observed_class_distribution != {} and cloned._observed_class_distribution == {}

Example #3

Show file

class Bayes(IncrementalClassifier):
    def __init__(self):
        super().__init__()
        self.clf = NaiveBayes()

    def partial_fit(self, one_row):
        self.clf.partial_fit([one_row[0]], [one_row[1]])

    def predict(self, x):
        return self.clf.predict(x)

Example #4

Show file

class LDDDSDA(BaseDistributionDetector):

    def __init__(self, batch_size=100, train_size=100, rho=0.1, alpha=0.05, base_learner=NaiveBayes()):
        super().__init__()
        self.w = batch_size
        self.l = base_learner
        self.n = train_size
        self.alpha = alpha
        self.rho = rho
        self.trained = False

        self.d_train_X, self.d_train_y = [], []
        self.d_buffer_X, self.d_buffer_y = [], []
        self.reset()

    def reset(self):
        super().reset()

    def add_element(self, X, y):

        if self.in_concept_change:
            self.reset()

        X, y = np.asarray(X), np.asarray(y)

        # if X.ndim != 1 or y.ndim != 1:
        #     raise ValueError("input_value should has one dimension")

        if (not self.trained) and len(self.d_train_X) < self.n:
            self.d_train_X.append(X)
            self.d_train_y.append(y)
            if len(self.d_train_X) == self.n:
                self.l.partial_fit(np.asarray(self.d_train_X), np.asarray(self.d_train_y))
                self.trained = True
            return

        if len(self.d_train_X) < self.w:
            self.d_train_X.append(X)
            self.d_train_y.append(y)
            return

        self.d_buffer_X.append(X)
        self.d_buffer_y.append(y)

        if len(self.d_buffer_X) < self.w:
            return

        self.d_train_X, self.d_train_y = self.ldd_dis(np.asarray(self.d_train_X),
                                                      np.asarray(self.d_train_y),
                                                      np.asarray(self.d_buffer_X),
                                                      np.asarray(self.d_buffer_y))
        self.l = NaiveBayes()
        self.l.fit(self.d_train_X, self.d_train_y)

        self.d_train_X = self.d_train_X.tolist()
        self.d_train_y = self.d_train_y.tolist()
        print(len(self.d_train_X))
        self.d_buffer_X = []
        self.d_buffer_y = []

        return

    def predict(self, X):
        return self.l.predict(X)

    def ldd_dis(self, d1_X, d1_y, d2_X, d2_y):
        d = np.append(d1_X, d2_X, axis=0)
        d_y = np.append(d1_y, d2_y, axis=0)
        d1_dec, d1_sta, d1_inc = [], [], []
        d2_dec, d2_sta, d2_inc = [], [], []

        kdtree = KDTree(d)
        d_knn = []
        for i in range(d.shape[0]):
            d_knn.append(set(kdtree.query(X=d[i:i+1],
                                          k=int(d.shape[0] * self.rho),
                                          return_distance=False)[0]))

        indexes = np.arange(d.shape[0])
        np.random.shuffle(indexes)
        _d1 = set(indexes[:d1_X.shape[0]])
        _d2 = set(indexes[d1_X.shape[0]:])
        deltas = []
        for i in range(d.shape[0]):
            x1 = len(d_knn[indexes[i]] & _d1)
            x2 = len(d_knn[indexes[i]] & _d2)
            if i < d1_X.shape[0]:
                deltas.append(x2 / x1 - 1)
            else:
                deltas.append(x1 / x2 - 1)

        delta_std = np.std(deltas, ddof=1)
        theta_dec = stats.norm.ppf(1 - self.alpha, 0, delta_std)
        theta_inc = stats.norm.ppf(self.alpha, 0, delta_std)

        _d1 = set(np.arange(d1_X.shape[0]))
        _d2 = set(np.arange(d1_X.shape[0], d.shape[0]))
        for i in range(d.shape[0]):
            x1 = len(d_knn[i] & _d1)
            x2 = len(d_knn[i] & _d2)
            if i < d1_X.shape[0]:
                delta = x2 / x1 - 1
                if delta < theta_dec:
                    d1_dec.append(i)
                elif delta > theta_inc:
                    d1_inc.append(i)
                else:
                    d1_sta.append(i)
            else:
                delta = x1 / x2 - 1
                if delta < theta_dec:
                    d2_dec.append(i)
                elif delta > theta_inc:
                    d2_inc.append(i)
                else:
                    d2_sta.append(i)

        if len(d1_dec) == 0 and len(d2_inc) == 0:
            return d1_X, d1_y

        self.in_concept_change = True

        aux = []
        if len(d2_dec) != 0:
            aux.append(len(d1_inc) / len(d2_dec))
        if len(d2_sta) != 0:
            aux.append(len(d1_sta) / len(d2_sta))
        if len(d2_inc) != 0:
            aux.append(len(d1_dec) / len(d2_inc))
        k = min(aux)

        d2_dec += d1_inc[:int(k * len(d2_dec))]
        d2_sta += d1_sta[:int(k * len(d2_sta))]
        d2_inc += d1_dec[:int(k * len(d2_inc))]

        aux_indexes = d2_inc + d2_sta + d2_dec

        r = self.w / len(aux_indexes)

        d2_dec = d2_dec[:int(len(d2_dec)*r)]
        d2_sta = d2_sta[:int(len(d2_sta)*r)]
        d2_inc = d1_inc[:int(len(d2_inc)*r)]

        aux_indexes = d2_inc + d2_sta + d2_dec

        return d[aux_indexes], d_y[aux_indexes]

Example #5

Show file

File: conf_matrix.py Project: foxriver76/coreset-meb

def main():
    
    overall_kswin_tp = overall_kswin_tn = overall_kswin_fp = overall_kswin_fn = 0
    overall_adwin_tp = overall_adwin_tn = overall_adwin_fp = overall_adwin_fn = 0
#   mebwin_drifts = []
    overall_k_swmebwin_tp = overall_k_swmebwin_tn = overall_k_swmebwin_fp = overall_k_swmebwin_fn = 0
    overall_swmebwin_tp = overall_swmebwin_tn = overall_swmebwin_fp = overall_swmebwin_fn = 0
    overall_eddm_tp = overall_eddm_tn = overall_eddm_fp = overall_eddm_fn = 0
    overall_ddm_tp = overall_ddm_tn = overall_ddm_fp = overall_ddm_fn = 0
    
    for stream in streams:
        print(stream.name)
        
        f = open('drifts.txt', 'a+')
        f.write(f'**{stream.name}**\n\n')
        f.close()
                
        stream.prepare_for_use()
        
        stream.next_sample()
        
#        mebwin = MEBWIN(epsilon=0.1, sensitivity=0.98, w_size=100, stat_size=30)
        adwin = []
        kswin = []
        ddm = DDM(min_num_instances=30)
        eddm = EDDM()
        
        data = []
        labels = []
        predictions = []
        
        kswin_drifts = []
        adwin_drifts = []
#        mebwin_drifts = []
        k_swmebwin_drifts = []
        swmebwin_drifts = []
        eddm_drifts = []
        ddm_drifts = []
        
        swmebwin = SWMEBWIN(classes=stream.target_values, w_size=80, epsilon=0.05)
#        k_swmebwin = Kernel_SWMEBWIN(classes=stream.target_values, w_size=80, epsilon=0.05, gamma=10**10)
        k_swmebwin = Kernel_SWMEBWIN(classes=stream.target_values, w_size=80, epsilon=0.05)
        # gamma maybe 1.0 / stream.current_sample_x.shape[1]
        RANGE = 1000000
        DIM = 50
        # - 2 because first drift is at 2000 not 1000 and last drift is not detectable
#        COUNT_DRIFTS = RANGE / 1000 - 2
        
        n_rand_dims = DIM - stream.current_sample_x.size
        multiply = n_rand_dims // stream.current_sample_x.size
        
        # partial fit -> pretrain
        for _m in range(multiply):
            current_sample_x = np.array([[]])
            current_sample_x = np.concatenate(
                        (current_sample_x, stream.current_sample_x), axis=1)
     
        bayes = NaiveBayes()
        bayes.partial_fit(np.array(current_sample_x), list(stream.current_sample_y.ravel()))
        
        for j in range(DIM):
            adwin.append(ADWIN(delta=0.002))
            kswin.append(KSWIN(w_size=300, stat_size=30, alpha=0.0001))
                    
        """Add dims"""
        for i in range(RANGE):
            current_sample_x = np.array([[]])
            for _m in range(multiply):
                current_sample_x = np.concatenate(
                        (current_sample_x, stream.current_sample_x), axis=1)
            data.append(current_sample_x.ravel())
            labels.append(stream.current_sample_y.ravel()[0])
            predictions.append(0 if bayes.predict(current_sample_x) == labels[i] else 1)
            bayes.partial_fit(current_sample_x, list(stream.current_sample_y.ravel()))
            stream.next_sample()
        
        # MEBWIN
    #    start = time.time()
    #    for i in range(RANGE):
    #        mebwin.add_element(data[i])
    #        
    #        if mebwin.change_detected is True:
    #            mebwin_drifts.append(i)
    #
    #    f = open('drifts.txt', 'a+')
    #    f.write(f'MEBWIN detected {len(mebwin_drifts)} drifts in {time.time() - start} {mebwin_drifts}\n\n')
    #    f.close() 
    #    print(f'MEBWIN took {time.time() - start} sec and detected {len(mebwin_drifts)} drifts')
    
        # Kernel SWMEBWIN
        start = time.time()
        for i in range(RANGE):
            k_swmebwin.add_element(value=data[i], label=labels[i])
            
            if k_swmebwin.change_detected is True:
                k_swmebwin_drifts.append(i)
          
        end = time.time() - start
    
        f1, tp, fp, tn, fn = confusion_matrix_stats(k_swmebwin_drifts, RANGE)
        overall_k_swmebwin_tp += tp
        overall_k_swmebwin_tn += tn
        overall_k_swmebwin_fp += fp
        overall_k_swmebwin_fn += fn
    
        print(f'F1-Score: {f1}')
        print(f'{tp} true positives, {fp} false positives')
        print(f'{tn} true negatives, {fn} false negatives')
            
        f = open('drifts.txt', 'a+')
        f.write(f'K-SWMEB detected {len(k_swmebwin_drifts)} drifts in {time.time() - start} {k_swmebwin_drifts}\n\n')
        f.close()
        print(f'K-SW-MEBWIN took {end} sec and detected {len(k_swmebwin_drifts)} drifts\n')
             
        # SWMEBWIN
        start = time.time()
        for i in range(RANGE):
            swmebwin.add_element(value=data[i], label=labels[i])
            
            if swmebwin.change_detected is True:
                swmebwin_drifts.append(i)
          
        end = time.time() - start
    
        f1, tp, fp, tn, fn = confusion_matrix_stats(swmebwin_drifts, RANGE)
        
        overall_swmebwin_tp += tp
        overall_swmebwin_tn += tn
        overall_swmebwin_fp += fp
        overall_swmebwin_fn += fn
    
        print(f'F1-Score: {f1}')
        print(f'{tp} true positives, {fp} false positives')
        print(f'{tn} true negatives, {fn} false negatives')
            
        f = open('drifts.txt', 'a+')
        f.write(f'SWMEB detected {len(swmebwin_drifts)} drifts in {time.time() - start} {swmebwin_drifts}\n\n')
        f.close()
        print(f'SW-MEBWIN took {end} sec and detected {len(swmebwin_drifts)} drifts\n')
                
        # ADWIN
        start = time.time()
        for i in range(RANGE):
            adwin_detected = False
        
            for j in range(data[i].size):
                adwin[j].add_element(data[i][j])
                if adwin[j].detected_change():
                    adwin_detected = True
                    
            if adwin_detected is True:
                adwin_drifts.append(i)
                
        end = time.time() - start
        
        f1, tp, fp, tn, fn = confusion_matrix_stats(adwin_drifts, RANGE)
        
        overall_adwin_tp += tp
        overall_adwin_tn += tn
        overall_adwin_fp += fp
        overall_adwin_fn += fn
    
        print(f'F1-Score: {f1}')
        print(f'{tp} true positives, {fp} false positives')
        print(f'{tn} true negatives, {fn} false negatives')
            
        f = open('drifts.txt', 'a+')
        f.write(f'ADWIN detected {len(adwin_drifts)} drifts in {time.time() - start} at {adwin_drifts}\n\n')
        f.close()
        print(f'ADWIN took {end} sec and detected {len(adwin_drifts)} drifts\n')
        
        # KSWIN
        start = time.time()
        for i in range(RANGE):
            kswin_detected = False
            
            for j in range(data[i].size):    
                kswin[j].add_element(data[i][j])
                if kswin[j].detected_change():
                    kswin_detected = True
                    
            if kswin_detected is True:
                kswin_drifts.append(i)
          
        end = time.time() - start
        
        f1, tp, fp, tn, fn = confusion_matrix_stats(kswin_drifts, RANGE)
        
        overall_kswin_tp += tp
        overall_kswin_tn += tn
        overall_kswin_fp += fp
        overall_kswin_fn += fn
    
        print(f'F1-Score: {f1}')
        print(f'{tp} true positives, {fp} false positives')
        print(f'{tn} true negatives, {fn} false negatives')     
        
        f = open('drifts.txt', 'a+')
        f.write(f'KSWIN detected {len(kswin_drifts)} drifts in {time.time() - start} at {kswin_drifts}\n\n')
        f.close()
        print(f'KSWIN took {end} sec and detected {len(kswin_drifts)} drifts\n')
        
        # EDDM
        start = time.time()
        for i in range(RANGE):
            eddm_detected = False
            
            eddm.add_element(predictions[i])
            
            if eddm.detected_change():
                eddm_detected = True
                    
            if eddm_detected is True:
                eddm_drifts.append(i)
                
        end = time.time() - start
          
        f1, tp, fp, tn, fn = confusion_matrix_stats(eddm_drifts, RANGE)
        
        overall_eddm_tp += tp
        overall_eddm_tn += tn
        overall_eddm_fp += fp
        overall_eddm_fn += fn
    
        print(f'F1-Score: {f1}')
        print(f'{tp} true positives, {fp} false positives')
        print(f'{tn} true negatives, {fn} false negatives')
        
        f = open('drifts.txt', 'a+')
        f.write(f'EDDM detected {len(eddm_drifts)} drifts in {time.time() - start} at {eddm_drifts}\n\n')
        f.close()
        print(f'EDDM took {end} sec and detected {len(eddm_drifts)} drifts\n')
        
        # DDM
        start = time.time()
        for i in range(RANGE):
            ddm_detected = False
            ddm.add_element(predictions[i])
            if ddm.detected_change():
                ddm_detected = True
                    
            if ddm_detected is True:
                ddm_drifts.append(i)
                
        end = time.time() - start
        
        f1, tp, fp, tn, fn = confusion_matrix_stats(ddm_drifts, RANGE)
        
        overall_ddm_tp += tp
        overall_ddm_tn += tn
        overall_ddm_fp += fp
        overall_ddm_fn += tn
    
        print(f'F1-Score: {f1}')
        print(f'{tp} true positives, {fp} false positives')
        print(f'{tn} true negatives, {fn} false negatives')
        
        f = open('drifts.txt', 'a+')
        f.write(f'DDM detected {len(ddm_drifts)} drifts in {time.time() - start} at {ddm_drifts}\n\n')
        f.close()
        print(f'DDM took {end} sec and detected {len(ddm_drifts)} drifts\n')
        
    # OVERALL STATISTICS
    print(50 * '-')
    print('K-SWMEBWIN\n')
    print(f'Overall F1: {calc_f1(overall_k_swmebwin_tp, overall_k_swmebwin_fp, overall_k_swmebwin_tn, overall_k_swmebwin_fn)}')
    print(f'{overall_k_swmebwin_tp} true positives, {overall_k_swmebwin_fp} false positives')
    print(f'{overall_k_swmebwin_tn} true negatives, {overall_k_swmebwin_fn} false negatives')
    print(50* '-')
    
    print(50 * '-')
    print('SWMEBWIN\n')
    print(f'Overall F1: {calc_f1(overall_swmebwin_tp, overall_swmebwin_fp, overall_swmebwin_tn, overall_swmebwin_fn)}')
    print(f'{overall_swmebwin_tp} true positives, {overall_swmebwin_fp} false positives')
    print(f'{overall_swmebwin_tn} true negatives, {overall_swmebwin_fn} false negatives')
    print(50* '-')
    
    print(50 * '-')
    print('KSWIN\n')
    print(f'Overall F1: {calc_f1(overall_kswin_tp, overall_kswin_fp, overall_kswin_tn, overall_kswin_fn)}')
    print(f'{overall_kswin_tp} true positives, {overall_kswin_fp} false positives')
    print(f'{overall_kswin_tn} true negatives, {overall_kswin_fn} false negatives')
    print(50* '-')
    
    print(50 * '-')
    print('ADWIN\n')
    print(f'Overall F1: {calc_f1(overall_adwin_tp, overall_adwin_fp, overall_adwin_tn, overall_adwin_fn)}')
    print(f'{overall_adwin_tp} true positives, {overall_adwin_fp} false positives')
    print(f'{overall_adwin_tn} true negatives, {overall_adwin_fn} false negatives')
    print(50* '-')
    
    print(50 * '-')
    print('DDM\n')
    print(f'Overall F1: {calc_f1(overall_ddm_tp, overall_ddm_fp, overall_ddm_tn, overall_ddm_fn)}')
    print(f'{overall_ddm_tp} true positives, {overall_ddm_fp} false positives')
    print(f'{overall_ddm_tn} true negatives, {overall_ddm_fn} false negatives')
    print(50* '-')
    
    print(50 * '-')
    print('EDDM\n')
    print(f'Overall F1: {calc_f1(overall_eddm_tp, overall_eddm_fp, overall_eddm_tn, overall_eddm_fn)}')
    print(f'{overall_eddm_tp} true positives, {overall_eddm_fp} false positives')
    print(f'{overall_eddm_tn} true negatives, {overall_eddm_fn} false negatives')
    print(50* '-')