コード例 #1
0
ファイル: test_ddm.py プロジェクト: houcembenmakhlouf/GHVFDT
def test_ddm():
    """
    DDM drift detection test.
    The first half of the data contains a sequence corresponding to a normal distribution with mean 0 and sigma 0.1.
    The second half corresponds to a normal distribution with mean 0.5 and sigma 0.1.
    """
    ddm = DDM()

    # Data
    np.random.seed(1)
    mu, sigma = 0, 0.1  # mean and standard deviation
    d_1 = np.random.normal(mu, sigma, 1000) > 0
    mu, sigma = 0.5, 0.1  # mean and standard deviation
    d_2 = np.random.normal(mu, sigma, 1000) > 0
    data_stream = np.concatenate((d_1.astype(int), d_2.astype(int)))

    expected_indices = [103, 1060]
    detected_indices = []

    for i in range(data_stream.size):
        ddm.add_element(data_stream[i])
        if ddm.detected_change():
            detected_indices.append(i)

    assert detected_indices == expected_indices

    expected_info = "DDM(min_num_instances=None, out_control_level=3.0, warning_level=2.0)"
    assert ddm.get_info() == expected_info
コード例 #2
0
ファイル: sim_adwin.py プロジェクト: thanapol2/data_stream
def sim_ddm(input_stream, start_point=0):
    ddm = DDM()
    change_point = []
    detected_warning = []
    for i in range(len(input_stream)):
        ddm.add_element(input_stream[i])
        if ddm.detected_warning_zone():
            detected_warning.append(i + start_point)
        if ddm.detected_change():
            # plt.axvline(i, color='r', linestyle='dashed')
            change_point.append(i + start_point)
            # print('Change detected in data: ' + str(input_stream[i]) + ' - at index: ' + str(i)+'\n\n')

    return detected_warning, change_point
コード例 #3
0
ファイル: ddm.py プロジェクト: farnaz2018/NLP-Project
def ddm_test():
    ddm = DDM()
    true_occur_position = 4443
    data_stream = np.load("data/stream_acc.npy")
    for i in tqdm(range(data_stream.shape[0])):
        # print(data_stream[i])
        # print(i)
        ddm.add_element(data_stream[i])
        if ddm.detected_warning_zone():
            print('Warning zone has been detected in data: ' +
                  str(data_stream[i]) + ' - of index: ' + str(i))
        if ddm.detected_change():
            print('Change has been detected in data: ' + str(data_stream[i]) +
                  ' - of index: ' + str(i))
コード例 #4
0
def test_ddm(test_path):
    """
    DDM drift detection test.
    The first half of the stream contains a sequence corresponding to a normal distribution of integers from 0 to 1.
    From index 999 to 1999 the sequence is a normal distribution of integers from 0 to 7.
    """
    ddm = DDM()
    test_file = os.path.join(test_path, 'drift_stream.npy')
    data_stream = np.load(test_file)
    expected_indices = [1009]
    detected_indices = []

    for i in range(data_stream.size):
        ddm.add_element(data_stream[i])
        if ddm.detected_change():
            detected_indices.append(i)

    assert detected_indices == expected_indices
コード例 #5
0
ファイル: experiment.py プロジェクト: roeicoh1/FinalProject
    def run(self):
        '''
        main method to simulate new experiment
        '''
        print(f"Starting Experiment:{self}")
        try:
            start_window_size = self.window_size
            num_of_correct_predictions, predictions_counter = 0, 0
            ddm = DDM()
            for record in range(self.X.shape[0]):
                x_record, y_record = np.array([self.X[record, :]]), np.ravel(np.array([self.y[record]]))
                if record < self.window_size:  # aggregate records till window size
                    continue
                elif record == self.window_size:  # first initialization
                    try:
                        self.init_ofs_ol(record)
                    except Exception as e:
                        # case where ofs failed to find features - try to add more records and replay process
                        if self.window_size > start_window_size * 4: raise Exception("OFS could not find features.")
                        self.window_size += 50
                        logging.info(f"Changed window size from {self.window_size - 50} to {self.window_size}")
                    continue

                # predict
                my_pred = self.ol.created_model.predict(
                    x_record) if self.ofs is None else self.ol.created_model.predict(
                    x_record[:, self.current_selected_features])
                predictions_counter += 1
                if y_record[0] == my_pred[0]: num_of_correct_predictions += 1

                ddm.add_element(num_of_correct_predictions / predictions_counter)  # add result to concept drift model
                self.prequential_accuracy.append(num_of_correct_predictions / predictions_counter)  # add accuracy
                self.memory_usage.append(psutil.Process(os.getpid()).memory_info().rss)  # add memory usage

                if self.ol.lazy:  # partial fit for lazy models
                    self.fit_lazy(x_record, y_record)
                if ddm.detected_change():  # check for concept drift
                    self.concept_drift_detection(start_window_size, record)
                elif record != self.X.shape[0] - 1 and self.ofs:
                    self.selected_features.append(self.selected_features[-1])
        except Experiment as e:
            logging.error(f"Error: {str(e)}")
コード例 #6
0
    while n_samples < 20000:
        driftDataX, driftDataY = stream.next_sample()
        my_pred = knn.predict(driftDataX)
        correct = driftDataY[0] == my_pred[0]
        if correct:
            corrects += 1
        n_samples += 1

        adwin.add_element(0 if correct else 1)
        if adwin.detected_change():
            # print('ADWIN', n_samples)
            adwin_results.append(n_samples)

        ddm.add_element(0 if correct else 1)
        if ddm.detected_change():
            # print('DDM', n_samples)
            ddm_results.append(n_samples)

        ph1.add_element(0 if correct else 1)
        if ph1.detected_change():
            # print('PH', n_samples)
            ph1_results.append(n_samples)

        ph2.add_element(0 if correct else 1)
        if ph2.detected_change():
            # print('PH', n_samples)
            ph2_results.append(n_samples)

        kswin1.add_element(corrects / n_samples)
        if kswin1.detected_change():
コード例 #7
0
# Simulate a data stream of size 1000 from a Standard normal distribution
stream = np.random.randn(1000)

stream[:10]
## Output-
#array([-1.0856306 ,  0.99734545,  0.2829785 , -1.50629471, -0.57860025,
#        1.65143654, -2.42667924, -0.42891263,  1.26593626, -0.8667404 ])

# Data concept are changed from index 299 to 600
for j in range(299, 600):
    stream[j] = np.random.randint(5, high=9)

# Stream elements are added to DDM and checking whether drift occured
for j in range(1000):
    d2m.add_element(stream[j])
    if d2m.detected_change():
        print('Concept drift detected in data: ' + str(stream[j]) +
              ' - at index: ' + str(j))
    if d2m.detected_warning_zone():
        print('Warning detected in data: ' + str(stream[j]) + ' - at index: ' +
              str(j))

### Output:
#Concept drift detected in data: 1.0693159694243486 - at index: 55
#Concept drift detected in data: 2.0871133595881854 - at index: 88
#Concept drift detected in data: 0.8123413299768204 - at index: 126
#Warning detected in data: 1.3772574828673068 - at index: 158
#Warning detected in data: -0.1431759743261871 - at index: 159
#Warning detected in data: 0.02031599823462459 - at index: 160
#Warning detected in data: -0.19396387055266243 - at index: 161
#Warning detected in data: 0.13402679274666512 - at index: 162
コード例 #8
0
ファイル: conf_matrix.py プロジェクト: foxriver76/coreset-meb
def main():
    
    overall_kswin_tp = overall_kswin_tn = overall_kswin_fp = overall_kswin_fn = 0
    overall_adwin_tp = overall_adwin_tn = overall_adwin_fp = overall_adwin_fn = 0
#   mebwin_drifts = []
    overall_k_swmebwin_tp = overall_k_swmebwin_tn = overall_k_swmebwin_fp = overall_k_swmebwin_fn = 0
    overall_swmebwin_tp = overall_swmebwin_tn = overall_swmebwin_fp = overall_swmebwin_fn = 0
    overall_eddm_tp = overall_eddm_tn = overall_eddm_fp = overall_eddm_fn = 0
    overall_ddm_tp = overall_ddm_tn = overall_ddm_fp = overall_ddm_fn = 0
    
    for stream in streams:
        print(stream.name)
        
        f = open('drifts.txt', 'a+')
        f.write(f'**{stream.name}**\n\n')
        f.close()
                
        stream.prepare_for_use()
        
        stream.next_sample()
        
#        mebwin = MEBWIN(epsilon=0.1, sensitivity=0.98, w_size=100, stat_size=30)
        adwin = []
        kswin = []
        ddm = DDM(min_num_instances=30)
        eddm = EDDM()
        
        data = []
        labels = []
        predictions = []
        
        kswin_drifts = []
        adwin_drifts = []
#        mebwin_drifts = []
        k_swmebwin_drifts = []
        swmebwin_drifts = []
        eddm_drifts = []
        ddm_drifts = []
        
        swmebwin = SWMEBWIN(classes=stream.target_values, w_size=80, epsilon=0.05)
#        k_swmebwin = Kernel_SWMEBWIN(classes=stream.target_values, w_size=80, epsilon=0.05, gamma=10**10)
        k_swmebwin = Kernel_SWMEBWIN(classes=stream.target_values, w_size=80, epsilon=0.05)
        # gamma maybe 1.0 / stream.current_sample_x.shape[1]
        RANGE = 1000000
        DIM = 50
        # - 2 because first drift is at 2000 not 1000 and last drift is not detectable
#        COUNT_DRIFTS = RANGE / 1000 - 2
        
        n_rand_dims = DIM - stream.current_sample_x.size
        multiply = n_rand_dims // stream.current_sample_x.size
        
        # partial fit -> pretrain
        for _m in range(multiply):
            current_sample_x = np.array([[]])
            current_sample_x = np.concatenate(
                        (current_sample_x, stream.current_sample_x), axis=1)
     
        bayes = NaiveBayes()
        bayes.partial_fit(np.array(current_sample_x), list(stream.current_sample_y.ravel()))
        
        for j in range(DIM):
            adwin.append(ADWIN(delta=0.002))
            kswin.append(KSWIN(w_size=300, stat_size=30, alpha=0.0001))
                    
        """Add dims"""
        for i in range(RANGE):
            current_sample_x = np.array([[]])
            for _m in range(multiply):
                current_sample_x = np.concatenate(
                        (current_sample_x, stream.current_sample_x), axis=1)
            data.append(current_sample_x.ravel())
            labels.append(stream.current_sample_y.ravel()[0])
            predictions.append(0 if bayes.predict(current_sample_x) == labels[i] else 1)
            bayes.partial_fit(current_sample_x, list(stream.current_sample_y.ravel()))
            stream.next_sample()
        
        # MEBWIN
    #    start = time.time()
    #    for i in range(RANGE):
    #        mebwin.add_element(data[i])
    #        
    #        if mebwin.change_detected is True:
    #            mebwin_drifts.append(i)
    #
    #    f = open('drifts.txt', 'a+')
    #    f.write(f'MEBWIN detected {len(mebwin_drifts)} drifts in {time.time() - start} {mebwin_drifts}\n\n')
    #    f.close() 
    #    print(f'MEBWIN took {time.time() - start} sec and detected {len(mebwin_drifts)} drifts')
    
        # Kernel SWMEBWIN
        start = time.time()
        for i in range(RANGE):
            k_swmebwin.add_element(value=data[i], label=labels[i])
            
            if k_swmebwin.change_detected is True:
                k_swmebwin_drifts.append(i)
          
        end = time.time() - start
    
        f1, tp, fp, tn, fn = confusion_matrix_stats(k_swmebwin_drifts, RANGE)
        overall_k_swmebwin_tp += tp
        overall_k_swmebwin_tn += tn
        overall_k_swmebwin_fp += fp
        overall_k_swmebwin_fn += fn
    
        print(f'F1-Score: {f1}')
        print(f'{tp} true positives, {fp} false positives')
        print(f'{tn} true negatives, {fn} false negatives')
            
        f = open('drifts.txt', 'a+')
        f.write(f'K-SWMEB detected {len(k_swmebwin_drifts)} drifts in {time.time() - start} {k_swmebwin_drifts}\n\n')
        f.close()
        print(f'K-SW-MEBWIN took {end} sec and detected {len(k_swmebwin_drifts)} drifts\n')
             
        # SWMEBWIN
        start = time.time()
        for i in range(RANGE):
            swmebwin.add_element(value=data[i], label=labels[i])
            
            if swmebwin.change_detected is True:
                swmebwin_drifts.append(i)
          
        end = time.time() - start
    
        f1, tp, fp, tn, fn = confusion_matrix_stats(swmebwin_drifts, RANGE)
        
        overall_swmebwin_tp += tp
        overall_swmebwin_tn += tn
        overall_swmebwin_fp += fp
        overall_swmebwin_fn += fn
    
        print(f'F1-Score: {f1}')
        print(f'{tp} true positives, {fp} false positives')
        print(f'{tn} true negatives, {fn} false negatives')
            
        f = open('drifts.txt', 'a+')
        f.write(f'SWMEB detected {len(swmebwin_drifts)} drifts in {time.time() - start} {swmebwin_drifts}\n\n')
        f.close()
        print(f'SW-MEBWIN took {end} sec and detected {len(swmebwin_drifts)} drifts\n')
                
        # ADWIN
        start = time.time()
        for i in range(RANGE):
            adwin_detected = False
        
            for j in range(data[i].size):
                adwin[j].add_element(data[i][j])
                if adwin[j].detected_change():
                    adwin_detected = True
                    
            if adwin_detected is True:
                adwin_drifts.append(i)
                
        end = time.time() - start
        
        f1, tp, fp, tn, fn = confusion_matrix_stats(adwin_drifts, RANGE)
        
        overall_adwin_tp += tp
        overall_adwin_tn += tn
        overall_adwin_fp += fp
        overall_adwin_fn += fn
    
        print(f'F1-Score: {f1}')
        print(f'{tp} true positives, {fp} false positives')
        print(f'{tn} true negatives, {fn} false negatives')
            
        f = open('drifts.txt', 'a+')
        f.write(f'ADWIN detected {len(adwin_drifts)} drifts in {time.time() - start} at {adwin_drifts}\n\n')
        f.close()
        print(f'ADWIN took {end} sec and detected {len(adwin_drifts)} drifts\n')
        
        # KSWIN
        start = time.time()
        for i in range(RANGE):
            kswin_detected = False
            
            for j in range(data[i].size):    
                kswin[j].add_element(data[i][j])
                if kswin[j].detected_change():
                    kswin_detected = True
                    
            if kswin_detected is True:
                kswin_drifts.append(i)
          
        end = time.time() - start
        
        f1, tp, fp, tn, fn = confusion_matrix_stats(kswin_drifts, RANGE)
        
        overall_kswin_tp += tp
        overall_kswin_tn += tn
        overall_kswin_fp += fp
        overall_kswin_fn += fn
    
        print(f'F1-Score: {f1}')
        print(f'{tp} true positives, {fp} false positives')
        print(f'{tn} true negatives, {fn} false negatives')     
        
        f = open('drifts.txt', 'a+')
        f.write(f'KSWIN detected {len(kswin_drifts)} drifts in {time.time() - start} at {kswin_drifts}\n\n')
        f.close()
        print(f'KSWIN took {end} sec and detected {len(kswin_drifts)} drifts\n')
        
        # EDDM
        start = time.time()
        for i in range(RANGE):
            eddm_detected = False
            
            eddm.add_element(predictions[i])
            
            if eddm.detected_change():
                eddm_detected = True
                    
            if eddm_detected is True:
                eddm_drifts.append(i)
                
        end = time.time() - start
          
        f1, tp, fp, tn, fn = confusion_matrix_stats(eddm_drifts, RANGE)
        
        overall_eddm_tp += tp
        overall_eddm_tn += tn
        overall_eddm_fp += fp
        overall_eddm_fn += fn
    
        print(f'F1-Score: {f1}')
        print(f'{tp} true positives, {fp} false positives')
        print(f'{tn} true negatives, {fn} false negatives')
        
        f = open('drifts.txt', 'a+')
        f.write(f'EDDM detected {len(eddm_drifts)} drifts in {time.time() - start} at {eddm_drifts}\n\n')
        f.close()
        print(f'EDDM took {end} sec and detected {len(eddm_drifts)} drifts\n')
        
        # DDM
        start = time.time()
        for i in range(RANGE):
            ddm_detected = False
            ddm.add_element(predictions[i])
            if ddm.detected_change():
                ddm_detected = True
                    
            if ddm_detected is True:
                ddm_drifts.append(i)
                
        end = time.time() - start
        
        f1, tp, fp, tn, fn = confusion_matrix_stats(ddm_drifts, RANGE)
        
        overall_ddm_tp += tp
        overall_ddm_tn += tn
        overall_ddm_fp += fp
        overall_ddm_fn += tn
    
        print(f'F1-Score: {f1}')
        print(f'{tp} true positives, {fp} false positives')
        print(f'{tn} true negatives, {fn} false negatives')
        
        f = open('drifts.txt', 'a+')
        f.write(f'DDM detected {len(ddm_drifts)} drifts in {time.time() - start} at {ddm_drifts}\n\n')
        f.close()
        print(f'DDM took {end} sec and detected {len(ddm_drifts)} drifts\n')
        
    # OVERALL STATISTICS
    print(50 * '-')
    print('K-SWMEBWIN\n')
    print(f'Overall F1: {calc_f1(overall_k_swmebwin_tp, overall_k_swmebwin_fp, overall_k_swmebwin_tn, overall_k_swmebwin_fn)}')
    print(f'{overall_k_swmebwin_tp} true positives, {overall_k_swmebwin_fp} false positives')
    print(f'{overall_k_swmebwin_tn} true negatives, {overall_k_swmebwin_fn} false negatives')
    print(50* '-')
    
    print(50 * '-')
    print('SWMEBWIN\n')
    print(f'Overall F1: {calc_f1(overall_swmebwin_tp, overall_swmebwin_fp, overall_swmebwin_tn, overall_swmebwin_fn)}')
    print(f'{overall_swmebwin_tp} true positives, {overall_swmebwin_fp} false positives')
    print(f'{overall_swmebwin_tn} true negatives, {overall_swmebwin_fn} false negatives')
    print(50* '-')
    
    print(50 * '-')
    print('KSWIN\n')
    print(f'Overall F1: {calc_f1(overall_kswin_tp, overall_kswin_fp, overall_kswin_tn, overall_kswin_fn)}')
    print(f'{overall_kswin_tp} true positives, {overall_kswin_fp} false positives')
    print(f'{overall_kswin_tn} true negatives, {overall_kswin_fn} false negatives')
    print(50* '-')
    
    print(50 * '-')
    print('ADWIN\n')
    print(f'Overall F1: {calc_f1(overall_adwin_tp, overall_adwin_fp, overall_adwin_tn, overall_adwin_fn)}')
    print(f'{overall_adwin_tp} true positives, {overall_adwin_fp} false positives')
    print(f'{overall_adwin_tn} true negatives, {overall_adwin_fn} false negatives')
    print(50* '-')
    
    print(50 * '-')
    print('DDM\n')
    print(f'Overall F1: {calc_f1(overall_ddm_tp, overall_ddm_fp, overall_ddm_tn, overall_ddm_fn)}')
    print(f'{overall_ddm_tp} true positives, {overall_ddm_fp} false positives')
    print(f'{overall_ddm_tn} true negatives, {overall_ddm_fn} false negatives')
    print(50* '-')
    
    print(50 * '-')
    print('EDDM\n')
    print(f'Overall F1: {calc_f1(overall_eddm_tp, overall_eddm_fp, overall_eddm_tn, overall_eddm_fn)}')
    print(f'{overall_eddm_tp} true positives, {overall_eddm_fp} false positives')
    print(f'{overall_eddm_tn} true negatives, {overall_eddm_fn} false negatives')
    print(50* '-')