Ejemplo n.º 1
0
def test_sim_crop(input_stream, file_name, crop_size=0):
    adwin = ADWIN()
    change_point = []
    for i in range(len(input_stream)):
        adwin.add_element(input_stream[i])
        if adwin.detected_change():
            # plt.axvline(i, color='r', linestyle='dashed')
            change_point.append(i)

    end_point_crop = change_point[0] + crop_size
    start_point_crop = change_point[0] - 100
    for i in change_point:
        if (i <= end_point_crop):
            plt.axvline(i, color='r', linestyle='dashed')
    crop_stream = input_stream[start_point_crop:end_point_crop]
    zoom_xi = list(range(start_point_crop, end_point_crop))
    plt.plot(zoom_xi, crop_stream)
    plt.ylabel('value')
    plt.xlabel('Time')
    fig = plt.gcf()
    fig.set_size_inches(10, 5.5)
    plt.savefig(os.path.join('image', file_name + "_result_zoom.png"),
                aspect='auto',
                bbox_inches='tight',
                dpi=200)
    plt.show()
    return change_point
        def learn_from_instance(self, X, y, weight, rhat, parent,
                                parent_branch):

            super().learn_from_instance(X, y, weight, rhat)

            true_target = y
            target_prediction = rhat.predict([X])[0]

            normalized_error = rhat.get_normalized_error(
                target_prediction, true_target)

            if self._estimation_error_weight is None:
                self._estimation_error_weight = ADWIN()

            old_error = self.get_error_estimation()

            # Add element to Adwin

            self._estimation_error_weight.add_element(normalized_error)
            # Detect change with Adwin
            self._error_change = self._estimation_error_weight.detected_change(
            )

            if self._error_change is True and old_error > self.get_error_estimation(
            ):
                self._error_change = False

            # call ActiveLearningNode
            weight_seen = self.get_weight_seen()

            if weight_seen - self.get_weight_seen_at_last_split_evaluation(
            ) >= rhat.grace_period:
                rhat._attempt_to_split(self, parent, parent_branch)
                self.set_weight_seen_at_last_split_evaluation(weight_seen)
Ejemplo n.º 3
0
    def learn_one(self, X, y, weight, tree, parent, parent_branch):
        y_pred = self.predict_one(X, tree=tree)
        normalized_error = get_normalized_error(y, y_pred, self)

        if tree.bootstrap_sampling:
            # Perform bootstrap-sampling
            k = self._random_state.poisson(1.0)
            if k > 0:
                weight = weight * k

        if self._adwin is None:
            self._adwin = ADWIN()

        old_error = self.error_estimation

        # Add element to Adwin
        self._adwin.add_element(normalized_error)
        # Detect change with Adwin
        self._error_change = self._adwin.detected_change()

        if self._error_change and old_error > self.error_estimation:
            self._error_change = False

        # Update statistics
        super().learn_one(X, y, weight=weight, tree=tree)

        weight_seen = self.total_weight

        if weight_seen - self.last_split_attempt_at >= tree.grace_period:
            tree._attempt_to_split(self, parent, parent_branch)
            self.last_split_attempt_at = weight_seen
 def __init__(self, split_test, class_observations):
     super().__init__(split_test, class_observations)
     self._estimation_error_weight = ADWIN()
     self._alternate_tree = None
     self.error_change = False
     self._random_seed = 1
     self._classifier_random = check_random_state(self._random_seed)
    def __init__(self,
                 nb_ensemble=10,
                 max_features='auto',
                 disable_weighted_vote=False,
                 lambda_value=6,
                 performance_metric='acc',
                 drift_detection_method: BaseDriftDetector = ADWIN(0.001),
                 warning_detection_method: BaseDriftDetector = ADWIN(0.01),
                 max_byte_size=33554432,
                 memory_estimate_period=2000000,
                 grace_period=50,
                 split_criterion='info_gain',
                 split_confidence=0.01,
                 tie_threshold=0.05,
                 binary_split=False,
                 stop_mem_management=False,
                 remove_poor_atts=False,
                 no_preprune=False,
                 leaf_prediction='nba',
                 nb_threshold=0,
                 nominal_attributes=None,
                 random_state=None):
        """AdaptiveRandomForest class constructor."""
        super().__init__()
        self.nb_ensemble = nb_ensemble
        self.max_features = max_features
        self.disable_weighted_vote = disable_weighted_vote
        self.lambda_value = lambda_value
        if isinstance(drift_detection_method, BaseDriftDetector):
            self.drift_detection_method = drift_detection_method
        else:
            self.drift_detection_method = None
        if isinstance(warning_detection_method, BaseDriftDetector):
            self.warning_detection_method = warning_detection_method
        else:
            self.warning_detection_method = None
        self.instances_seen = 0
        self._train_weight_seen_by_model = 0.0
        self.ensemble = None
        self.random_state = check_random_state(random_state)
        if performance_metric in ['acc', 'kappa']:
            self.performance_metric = performance_metric
        else:
            raise ValueError(
                'Invalid performance metric: {}'.format(performance_metric))

        # ARH Hoeffding Tree configuration
        self.max_byte_size = max_byte_size
        self.memory_estimate_period = memory_estimate_period
        self.grace_period = grace_period
        self.split_criterion = split_criterion
        self.split_confidence = split_confidence
        self.tie_threshold = tie_threshold
        self.binary_split = binary_split
        self.stop_mem_management = stop_mem_management
        self.remove_poor_atts = remove_poor_atts
        self.no_preprune = no_preprune
        self.leaf_prediction = leaf_prediction
        self.nb_threshold = nb_threshold
        self.nominal_attributes = nominal_attributes
Ejemplo n.º 6
0
class AdaptiveTree(object):
    def __init__(self,
                 tree,
                 kappa_window,
                 warning_delta,
                 drift_delta,
                 tree_pool_id=-1):
        self.tree_pool_id = tree_pool_id
        self.tree = tree
        self.bg_adaptive_tree = None
        self.is_candidate = False
        self.warning_detector = ADWIN(warning_delta)
        self.drift_detector = ADWIN(drift_delta)
        self.predicted_labels = deque(maxlen=kappa_window)
        self.kappa = -sys.maxsize
        self.kappa_window = kappa_window

    def update_kappa(self, actual_labels):
        if len(self.predicted_labels) < self.kappa_window:
            self.kappa = -sys.maxsize
        else:
            self.kappa = cohen_kappa_score(actual_labels, self.predicted_labels)
        return self.kappa

    def reset(self):
        self.bg_adaptive_tree = None
        self.is_candidate = False
        self.warning_detector.reset()
        self.drift_detector.reset()
        self.predicted_labels.clear()
        self.kappa = -sys.maxsize
Ejemplo n.º 7
0
def perform_drift_detection(predict_dataframe,
                            dataframe,
                            feature_names,
                            detector,
                            drift_notification,
                            token="") -> str:
    log("[INFO] Calling perform_drift_detection", token)
    log("[INFO] Selected data drift detection method: " + detector)
    baseline_data = dataframe.values.tolist()
    predict_data = predict_dataframe.values.tolist()
    overall_data = list()
    for a in baseline_data:
        overall_data.append(a)
    for b in predict_data:
        overall_data.append(b)
    overall_dataframe = pd.DataFrame(overall_data, columns=feature_names)
    drifts = dict()
    window = len(baseline_data)
    for feature in feature_names:
        detected_drifts_indices = list()
        # HDDM
        if detector == "HDDM":
            hddm_w = HDDM_W()
            for i in range(len(overall_dataframe[feature])):
                hddm_w.add_element(float(overall_dataframe[feature][i]))
                if hddm_w.detected_change() and i >= window:
                    detected_drifts_indices.append(i - window)
        # Page Hinkley
        if detector == "Page Hinkley":
            ph = PageHinkley()
            for i in range(len(overall_dataframe[feature])):
                ph.add_element(float(overall_dataframe[feature][i]))
                if ph.detected_change() and i >= window:
                    detected_drifts_indices.append(i - window)
        # ADWIN
        if detector == "ADWIN":
            adwin = ADWIN()
            for i in range(len(overall_dataframe[feature])):
                adwin.add_element(float(overall_dataframe[feature][i]))
                if adwin.detected_change() and i >= window:
                    detected_drifts_indices.append(i - window)
        # Check for detected drifts
        if len(detected_drifts_indices) != 0:
            log("[INFO] Data drift detected in feature: " + feature)
            log("[INFO] The drifted rows are: " + str(detected_drifts_indices))
            drifts[feature] = detected_drifts_indices
            if drift_notification:
                log("[INFO] Sending a web notification", token)
                message = "MaaS data drift detected from " + get_token_user(
                    token) + " (" + token + ")"
                if submit_web_notification(message, token):
                    log("[INFO] Web notification sent!")
                else:
                    log("[ERROR] Error occurred while sending a web notification"
                        )
    return json.dumps(drifts, cls=NpEncoder)
Ejemplo n.º 8
0
    def __init__(self, split_test, class_observations, random_state=None):
        super().__init__(split_test, class_observations)
        self._estimation_error_weight = ADWIN()
        self._alternate_tree = None
        self.error_change = False
        self.random_state = check_random_state(random_state)

        # To normalize the observed errors in the [0, 1] range
        self._min_error = float('Inf')
        self._max_error = float('-Inf')
Ejemplo n.º 9
0
    def __init__(self,
                 initial_stats=None,
                 parent_node=None,
                 random_state=None):
        super().__init__(initial_stats, parent_node, random_state)
        self._adwin = ADWIN()
        self._error_change = False

        # Normalization of info monitored by drift detectors (using Welford's algorithm)
        self._n = 0
 def __init__(self,
              initial_class_observations,
              perceptron_weight,
              random_state=None):
     super().__init__(initial_class_observations, perceptron_weight,
                      random_state)
     self._estimation_error_weight = ADWIN()
     self._error_change = False
     self._randomSeed = 1
     self._classifier_random = check_random_state(self._randomSeed)
Ejemplo n.º 11
0
 def __init__(self,
              k=5,
              max_window_size=sys.maxsize,
              leaf_size=30,
              categorical_list=[]):
     super().__init__(k=k,
                      max_window_size=max_window_size,
                      leaf_size=leaf_size,
                      categorical_list=categorical_list)
     self.adwin = ADWIN()
     self.window = None
Ejemplo n.º 12
0
def sim_adwin(input_stream, start_point=0):
    adwin = ADWIN(delta=.3)
    change_point = []
    for i in range(len(input_stream)):
        adwin.add_element(input_stream[i])
        if adwin.detected_change():
            # plt.axvline(i, color='r', linestyle='dashed')
            change_point.append(i + start_point)
            # print('Change detected in data: ' + str(input_stream[i]) + ' - at index: ' + str(i)+'\n\n')

    return change_point
    def __init__(self,
                 initial_class_observations,
                 parent_node,
                 random_state=None):
        super().__init__(initial_class_observations, parent_node, random_state)
        self._estimation_error_weight = ADWIN()
        self._error_change = False

        # To normalize the observed errors in the [0, 1] range
        self._min_error = float('Inf')
        self._max_error = float('-Inf')
Ejemplo n.º 14
0
  def __init__(self, window_size=100, n_estimators=25, anomaly_threshold=0.5, 
               drift_threshold=0.5, random_state=None, version="AnomalyRate",
               #Parameters for partial model update
               n_estimators_updated=0.5, updated_randomly=True,
               #Parameters for NDKSWIN
               alpha=0.01, data=None, n_dimensions=1, n_tested_samples=0.1,
               fixed_checked_dimension = False, fixed_checked_sample=False):
    
        super().__init__()

        self.n_estimators = n_estimators

        self.ensemble = None
      
        self.random_state = random_state

        self.window_size = window_size

        self.samples_seen = 0

        self.anomaly_rate = 0.20 

        self.anomaly_threshold = anomaly_threshold

        self.drift_threshold = drift_threshold

        self.window = None

        self.prec_window = None

        self.cpt = 0
        self.version = version
        self.model_update = [] #To count the number of times the model have been updated 0 Not updated and 1 updated
        self.model_update_windows = [] #To count the number of times the model have been updated 0 Not updated and 1 updated
        self.model_update.append(version) #Initialisation to know the concerned version of IForestASD
        self.model_update_windows.append("samples_seen_"+version) #Initialisation to know the number of data seen in the window
        self.n_estimators_updated=int(self.n_estimators*n_estimators_updated) # The percentage of new trees to compute when update on new window
        if n_estimators_updated <= 0.0 or n_estimators_updated > 1.0 :
            raise ValueError("n_estimators_updated must be > 0 and <= 1")
            
        self.updated_randomly=updated_randomly # If we will choose randomly the trees: True for randomly, 
                        # False to pick the first (n_estimators- int(n_estimators*n_estimators_updated)) trees

        self.alpha=alpha
        self.n_dimensions=n_dimensions
        self.n_tested_samples=n_tested_samples
        self.fixed_checked_dimension =fixed_checked_dimension
        self.fixed_checked_sample=fixed_checked_sample
        self.first_time_fit = True
        
        # TODO Maurras 27112020: Find a way to optimize the use of ADWIN()
        self.adwin = ADWIN()
Ejemplo n.º 15
0
def cp_detection_ADWIN(points):
    from skmultiflow.drift_detection.adwin import ADWIN
    adwin = ADWIN()
    detections = []
    # Adding stream elements to ADWIN and verifying if drift occurred
    for i in range(len(points)):
        adwin.add_element(points[i])
        if adwin.detected_change():
            detections.append(i)
            print('Change detected in data: ' + str(points[i]) +
                  ' - at index: ' + str(i))
    rpt.show.display(points, detections, figsize=(10, 6))
    plt.title('Change Point Detection: ADWIN')
    plt.show()
Ejemplo n.º 16
0
 def reset(self):
     """ reset
     
     Resets the adwin algorithm as well as the base model 
     kept by the KNN base class.
     
     Returns
     -------
     KNNAdwin
         self
     
     """
     self.adwin = ADWIN()
     return super().reset()
Ejemplo n.º 17
0
 def __init__(self,
              tree,
              kappa_window,
              warning_delta,
              drift_delta,
              tree_pool_id=-1):
     self.tree_pool_id = tree_pool_id
     self.tree = tree
     self.bg_adaptive_tree = None
     self.is_candidate = False
     self.warning_detector = ADWIN(warning_delta)
     self.drift_detector = ADWIN(drift_delta)
     self.predicted_labels = deque(maxlen=kappa_window)
     self.kappa = -sys.maxsize
     self.kappa_window = kappa_window
Ejemplo n.º 18
0
    def concept_drift_detection(self, X, Y):
        if self.init_drift_detection:
            if self.drift_detector == "KSWIN":
                self.cdd = [
                    KSWIN(w_size=100, stat_size=30, alpha=self.confidence)
                    for elem in X.T
                ]
            if self.drift_detector == "ADWIN":
                self.cdd = [ADWIN() for elem in X.T]
            if self.drift_detector == "DDM":
                self.cdd = [DDM() for elem in X.T]
            if self.drift_detector == "EDDM":
                self.cdd = [EDDM() for elem in X.T]
            self.init_drift_detection = False
        self.drift_detected = False

        if not self.init_drift_detection:
            for elem, detector in zip(X.T, self.cdd):
                for e in elem:
                    detector.add_element(e)
                    if detector.detected_change():
                        self.drift_detected = True
                        self.n_detections = self.n_detections + 1

        return self.drift_detected


# if name=="__main__":
#     from skmultiflow import
Ejemplo n.º 19
0
    def concept_drift_detection(self, X, Y):
        if self.init_drift_detection:
            if self.drift_detector == "KSWIN":
                self.cdd = [KSWIN(w_size = 100, stat_size = 30, alpha=self.confidence) for elem in X.T]
            if self.drift_detector == "ADWIN":
                self.cdd = [ADWIN() for elem in X.T]
            if self.drift_detector == "DDM":
                self.cdd = [DDM() for elem in X.T]
            if self.drift_detector == "EDDM":
                self.cdd = [EDDM() for elem in X.T]
            if self.drift_detector == "KSVEC":
                self.cdd = KSVEC(vec_size=X.shape[1])
            self.init_drift_detection = False
        self.drift_detected = False

        if not self.init_drift_detection:
            if self.drift_detector == "KSVEC":
                self.cdd.add_element(X)
                if self.cdd.detected_change():
                    self.drift_detected = True
            else:
                for elem, detector in zip(X.T, self.cdd):
                    for e in elem:
                        detector.add_element(e)
                        if detector.detected_change():
                            self.drift_detected = True
                            self.n_detections = self.n_detections +1

        return self.drift_detected
Ejemplo n.º 20
0
 def __init__(self, delta=0.002):
     """Inicjalizacja klasy algorytmu ADWIN"""
     self.name = 'ADWIN'
     self.delta = delta
     self.model = ADWIN(delta)
     self.change_indexes = []
     self.warning_zones_indexes = []
Ejemplo n.º 21
0
    def concept_drift_detection(self, X, Y):
        if self.init_drift_detection:
            if self.drift_detector == "KS":
                self.cdd = [KSWIN(alpha=self.confidence, w_size=self.window_size) for elem in X.T]
            if self.drift_detector == "ADWIN":
                self.cdd = [ADWIN(delta=self.confidence) for elem in X.T]
            if self.drift_detector == "DIST":
                self.cdd = [KSWIN(self.confidence, w_size=self.window_size) for c in self.classes_]
        self.init_drift_detection = False
        self.drift_detected = False

        if self.drift_detector == "DIST":
            try:
                class_prototypes = [self.w_[self.c_w_ == elem] for elem in self.classes_]
                new_distances = dict(
                    [(c, self.calcDistances(pts, X[Y == c])) for c, pts in zip(self.classes_, class_prototypes)])
                for (c, d_new), detector in zip(new_distances.items(), self.cdd):
                    detector.add_element(d_new)
                    if detector.detected_change():
                        self.drift_detected = True
            except Exception:
                print("Warning: Current Batch does not contain all labels!")
                # ValueError('zero-size array to reduction operation maximum which has no identity',)
                # In this batch not every label is present
        else:
            if not self.init_drift_detection:
                for elem, detector in zip(X.T, self.cdd):
                    for e in elem:
                        detector.add_element(e)
                        if detector.detected_change():
                            self.drift_detected = True

        return self.drift_detected
Ejemplo n.º 22
0
 def __adjust_ensemble_size(self):
     if len(self.classes) != len(self.ensemble):
         if len(self.classes) > len(self.ensemble):
             for i in range(len(self.ensemble), len(self.classes)):
                 self.ensemble.append(cp.deepcopy(self.h))
                 self.adwin_ensemble.append(ADWIN())
                 self.ensemble_length += 1
Ejemplo n.º 23
0
 def __adjust_ensemble_size(self):
     if len(self.classes) != len(self.ensemble):
         if len(self.classes) > len(self.ensemble):
             for i in range(len(self.ensemble), len(self.classes)):
                 self.ensemble.append(cp.deepcopy(self.base_estimator))
                 self.adwin_ensemble.append(ADWIN(self.delta))
                 self.n_estimators += 1
Ejemplo n.º 24
0
    def __init__(self,
                 h=KNN(),
                 ensemble_length=2,
                 w=6,
                 delta=0.002,
                 enable_code_matrix=False,
                 leverage_algorithm='leveraging_bag'):

        super().__init__()
        # default values
        self.h = h.reset()
        self.ensemble_length = None
        self.ensemble = None
        self.adwin_ensemble = None
        self.n_detected_changes = None
        self.matrix_codes = None
        self.enable_matrix_codes = None
        self.w = None
        self.delta = None
        self.classes = None
        self.leveraging_algorithm = None
        self.__configure(h, ensemble_length, w, delta, enable_code_matrix,
                         leverage_algorithm)
        self.init_matrix_codes = True

        self.adwin_ensemble = []
        for i in range(ensemble_length):
            self.adwin_ensemble.append(ADWIN(self.delta))
def make_detector(warn=False, s=1e-5, drift_detector='adwin'):
    sensitivity = s * 10 if warn else s
    if drift_detector == 'adwin':
        return ADWIN(delta=sensitivity)
    if drift_detector == 'EDDM':
        return EDDM()
    if drift_detector == 'DDM':
        return DDM()
Ejemplo n.º 26
0
def test_adwin(test_path):
    """
    ADWIN drift detection test.
    The first half of the stream contains a sequence corresponding to a normal distribution of integers from 0 to 1.
    From index 999 to 1999 the sequence is a normal distribution of integers from 0 to 7.

    """
    adwin = ADWIN()
    test_file = os.path.join(test_path, 'drift_stream.npy')
    data_stream = np.load(test_file)
    expected_indices = [1023, 1055, 1087, 1151]
    detected_indices = []

    for i in range(data_stream.size):
        adwin.add_element(data_stream[i])
        if adwin.detected_change():
            detected_indices.append(i)

    assert detected_indices == expected_indices
        def learn_from_instance(self, X, y, weight, hat, parent,
                                parent_branch):
            true_class = y

            k = self._classifier_random.poisson(1.0)
            # if k > 0:
            # weight = weight * k

            tmp = self.get_class_votes(X, hat)

            class_prediction = get_max_value_key(tmp)

            bl_correct = (true_class == class_prediction)

            if self.estimationErrorWeight is None:
                self.estimationErrorWeight = ADWIN()

            old_error = self.get_error_estimation()

            # Add element to Adwin
            add = 0.0 if (bl_correct is True) else 1.0

            self.estimationErrorWeight.add_element(add)
            # Detect change with Adwin
            self.ErrorChange = self.estimationErrorWeight.detected_change()

            if self.ErrorChange is True and old_error > self.get_error_estimation(
            ):
                self.ErrorChange = False

            # Update statistics call LearningNodeNBAdaptive
            super().learn_from_instance(X, y, weight,
                                        hat)  # CHECK changed self to super

            # call ActiveLearningNode
            weight_seen = self.get_weight_seen()

            if weight_seen - self.get_weight_seen_at_last_split_evaluation(
            ) >= hat.grace_period:
                hat._attempt_to_split(self, parent, parent_branch)
                self.set_weight_seen_at_last_split_evaluation(weight_seen)
Ejemplo n.º 28
0
    def __init__(self, h=KNNAdwin(), ensemble_length=2):
        super().__init__()
        # default values
        self.ensemble = None
        self.ensemble_length = None
        self.classes = None
        self.h = h.reset()
        self.__configure(h, ensemble_length)

        self.adwin_ensemble = []
        for i in range(ensemble_length):
            self.adwin_ensemble.append(ADWIN())
Ejemplo n.º 29
0
 def __configure(self):
     self.base_estimator.reset()
     self.n_estimators = self._init_n_estimators
     self.ensemble = [
         cp.deepcopy(self.base_estimator) for _ in range(self.n_estimators)
     ]
     self.adwin_ensemble = []
     for i in range(self.n_estimators):
         self.adwin_ensemble.append(ADWIN(self.delta))
     self.random_state = check_random_state(self._init_random_state)
     self.n_detected_changes = 0
     self.classes = None
     self.init_matrix_codes = True
Ejemplo n.º 30
0
def get_ARF_HAT():
    max_features = 3
    disable_weighted_vote = False
    lambda_value = 6
    performance_metric = 'acc'
    drift_detection_method = ADWIN(0.001)
    warning_detection_method = ADWIN(0.01)
    max_byte_size = 33554432
    memory_estimate_period = 2000000
    grace_period = 50
    split_criterion = 'info_gain'
    split_confidence = 0.01
    tie_threshold = 0.05
    binary_split = False
    stop_mem_management = False
    remove_poor_atts = False
    no_preprune = False
    leaf_prediction = 'nba'
    nb_threshold = 0
    nominal_attributes = None
    random_state = None

    classifier = TS_ARFHoeffdingTree(
        max_byte_size=max_byte_size,
        memory_estimate_period=memory_estimate_period,
        grace_period=grace_period,
        split_criterion=split_criterion,
        split_confidence=split_confidence,
        tie_threshold=tie_threshold,
        binary_split=binary_split,
        stop_mem_management=stop_mem_management,
        remove_poor_atts=remove_poor_atts,
        no_preprune=no_preprune,
        leaf_prediction=leaf_prediction,
        nb_threshold=nb_threshold,
        nominal_attributes=nominal_attributes,
        max_features=max_features,
        random_state=random_state)
    return classifier