Exemple #1
0
def unsupervised_analysis(df, nu, size, percent):
    stream = DataStream(df)
    stream.prepare_for_use()
    stream_clf = HoeffdingTree()
    stream_acc = []
    stream_record = []
    stream_true= 0
    buffer = dataBuffer(size, stream.n_features, percent)
    clf = svm.OneClassSVM(nu=nu, kernel="rbf", gamma='auto')
    
    #
    start = time.time()
    X,y = stream.next_sample(size)
    stream_clf.partial_fit(X,y, classes=stream.target_values)
    clf.fit(X)
    
    i=0
    while(stream.has_more_samples()): #stream.has_more_samples()
        X,y = stream.next_sample()
        if buffer.isEmpty():
            buffer.addInstance(X,y,clf.predict(X))
            y_hat = stream_clf.predict(X)
            stream_true = stream_true + check_true(y, y_hat)
            stream_clf.partial_fit(X,y)
            stream_acc.append(stream_true / (i+1))
            stream_record.append(check_true(y,y_hat))
            
        else:
            if buffer.driftCheck():             #detected
                #print("concept drift detected at {}".format(i))
                #retrain the model
                stream_clf.reset()
                #stream_clf = HoeffdingTree()
                stream_clf.partial_fit(buffer.getCurrentData(), buffer.getCurrentLabels(), classes=stream.target_values)
                #update one-class SVM
                clf.fit(buffer.getCurrentData())
                #evaluate and update the model
                y_hat = stream_clf.predict(X)
                stream_true = stream_true + check_true(y, y_hat)
                stream_clf.partial_fit(X,y)
                stream_acc.append(stream_true / (i+1))
                stream_record.append(check_true(y,y_hat))
                #add new sample to the window
                buffer.addInstance(X,y,clf.predict(X))
            else:
                #evaluate and update the model
                y_hat = stream_clf.predict(X)
                stream_true = stream_true + check_true(y, y_hat)
                stream_clf.partial_fit(X,y)
                stream_acc.append(stream_true / (i+1))
                stream_record.append(check_true(y,y_hat))
                #add new sample to the window
                buffer.addInstance(X,y,clf.predict(X))    
        i = i + 1
    #print(buffer.drift_count)
    
    elapsed = format(time.time() - start, '.4f')
    acc = format(stream_acc[-1] * 100, '.4f')
    final_accuracy = "Parameters: {}, {}, {}, Final accuracy: {}, Elapsed time: {}".format(nu,size,percent,acc,elapsed)
    return final_accuracy, stream_record
Exemple #2
0
class cdht(ClassifierMixin, BaseEstimator):
    def __init__(self, alpha=0.001, drift_detector="KSWIN"):
        self.classifier = HoeffdingTree()
        self.init_drift_detection = True
        self.drift_detector = drift_detector.upper()
        self.confidence = alpha
        self.n_detections = 0

    def partial_fit(self, X, y, classes=None):
        """
            Calls the MultinomialNB partial_fit from sklearn.
            ----------
            x : array-like, shape = [n_samples, n_features]
              Training vector, where n_samples in the number of samples and
              n_features is the number of features.
            y : array, shape = [n_samples]
              Target values (integers in classification, real numbers in
              regression)
            Returns
            --------
            """
        if self.concept_drift_detection(X, y):
            self.classifier.reset()

        self.classifier.partial_fit(X, y, classes)
        return self

    def predict(self, X):
        return self.classifier.predict(X)

    def concept_drift_detection(self, X, Y):
        if self.init_drift_detection:
            if self.drift_detector == "KSWIN":
                self.cdd = [
                    KSWIN(w_size=100, stat_size=30, alpha=self.confidence)
                    for elem in X.T
                ]
            if self.drift_detector == "ADWIN":
                self.cdd = [ADWIN() for elem in X.T]
            if self.drift_detector == "DDM":
                self.cdd = [DDM() for elem in X.T]
            if self.drift_detector == "EDDM":
                self.cdd = [EDDM() for elem in X.T]
            self.init_drift_detection = False
        self.drift_detected = False

        if not self.init_drift_detection:
            for elem, detector in zip(X.T, self.cdd):
                for e in elem:
                    detector.add_element(e)
                    if detector.detected_change():
                        self.drift_detected = True
                        self.n_detections = self.n_detections + 1

        return self.drift_detected


# if name=="__main__":
#     from skmultiflow import
X, y = stream.next_sample(int(w * rho))
stream_clf.partial_fit(X, y, classes=stream.target_values)
while (stream.has_more_samples()):
    X, y = stream.next_sample()
    if D3_win.isEmpty():
        D3_win.addInstance(X, y)
        y_hat = stream_clf.predict(X)
        stream_true = stream_true + check_true(y, y_hat)
        stream_clf.partial_fit(X, y)
        stream_acc.append(stream_true / (i + 1))
        stream_record.append(check_true(y, y_hat))
    else:
        if D3_win.driftCheck():  #detected
            #print("concept drift detected at {}".format(i))
            #retrain the model
            stream_clf.reset()
            stream_clf.partial_fit(D3_win.getCurrentData(),
                                   D3_win.getCurrentLabels(),
                                   classes=stream.target_values)
            #evaluate and update the model
            y_hat = stream_clf.predict(X)
            stream_true = stream_true + check_true(y, y_hat)
            stream_clf.partial_fit(X, y)
            stream_acc.append(stream_true / (i + 1))
            stream_record.append(check_true(y, y_hat))
            #add new sample to the window
            D3_win.addInstance(X, y)
        else:
            #evaluate and update the model
            y_hat = stream_clf.predict(X)
            stream_true = stream_true + check_true(y, y_hat)