def unsupervised_analysis(df, nu, size, percent): stream = DataStream(df) stream.prepare_for_use() stream_clf = HoeffdingTree() stream_acc = [] stream_record = [] stream_true= 0 buffer = dataBuffer(size, stream.n_features, percent) clf = svm.OneClassSVM(nu=nu, kernel="rbf", gamma='auto') # start = time.time() X,y = stream.next_sample(size) stream_clf.partial_fit(X,y, classes=stream.target_values) clf.fit(X) i=0 while(stream.has_more_samples()): #stream.has_more_samples() X,y = stream.next_sample() if buffer.isEmpty(): buffer.addInstance(X,y,clf.predict(X)) y_hat = stream_clf.predict(X) stream_true = stream_true + check_true(y, y_hat) stream_clf.partial_fit(X,y) stream_acc.append(stream_true / (i+1)) stream_record.append(check_true(y,y_hat)) else: if buffer.driftCheck(): #detected #print("concept drift detected at {}".format(i)) #retrain the model stream_clf.reset() #stream_clf = HoeffdingTree() stream_clf.partial_fit(buffer.getCurrentData(), buffer.getCurrentLabels(), classes=stream.target_values) #update one-class SVM clf.fit(buffer.getCurrentData()) #evaluate and update the model y_hat = stream_clf.predict(X) stream_true = stream_true + check_true(y, y_hat) stream_clf.partial_fit(X,y) stream_acc.append(stream_true / (i+1)) stream_record.append(check_true(y,y_hat)) #add new sample to the window buffer.addInstance(X,y,clf.predict(X)) else: #evaluate and update the model y_hat = stream_clf.predict(X) stream_true = stream_true + check_true(y, y_hat) stream_clf.partial_fit(X,y) stream_acc.append(stream_true / (i+1)) stream_record.append(check_true(y,y_hat)) #add new sample to the window buffer.addInstance(X,y,clf.predict(X)) i = i + 1 #print(buffer.drift_count) elapsed = format(time.time() - start, '.4f') acc = format(stream_acc[-1] * 100, '.4f') final_accuracy = "Parameters: {}, {}, {}, Final accuracy: {}, Elapsed time: {}".format(nu,size,percent,acc,elapsed) return final_accuracy, stream_record
class cdht(ClassifierMixin, BaseEstimator): def __init__(self, alpha=0.001, drift_detector="KSWIN"): self.classifier = HoeffdingTree() self.init_drift_detection = True self.drift_detector = drift_detector.upper() self.confidence = alpha self.n_detections = 0 def partial_fit(self, X, y, classes=None): """ Calls the MultinomialNB partial_fit from sklearn. ---------- x : array-like, shape = [n_samples, n_features] Training vector, where n_samples in the number of samples and n_features is the number of features. y : array, shape = [n_samples] Target values (integers in classification, real numbers in regression) Returns -------- """ if self.concept_drift_detection(X, y): self.classifier.reset() self.classifier.partial_fit(X, y, classes) return self def predict(self, X): return self.classifier.predict(X) def concept_drift_detection(self, X, Y): if self.init_drift_detection: if self.drift_detector == "KSWIN": self.cdd = [ KSWIN(w_size=100, stat_size=30, alpha=self.confidence) for elem in X.T ] if self.drift_detector == "ADWIN": self.cdd = [ADWIN() for elem in X.T] if self.drift_detector == "DDM": self.cdd = [DDM() for elem in X.T] if self.drift_detector == "EDDM": self.cdd = [EDDM() for elem in X.T] self.init_drift_detection = False self.drift_detected = False if not self.init_drift_detection: for elem, detector in zip(X.T, self.cdd): for e in elem: detector.add_element(e) if detector.detected_change(): self.drift_detected = True self.n_detections = self.n_detections + 1 return self.drift_detected # if name=="__main__": # from skmultiflow import
X, y = stream.next_sample(int(w * rho)) stream_clf.partial_fit(X, y, classes=stream.target_values) while (stream.has_more_samples()): X, y = stream.next_sample() if D3_win.isEmpty(): D3_win.addInstance(X, y) y_hat = stream_clf.predict(X) stream_true = stream_true + check_true(y, y_hat) stream_clf.partial_fit(X, y) stream_acc.append(stream_true / (i + 1)) stream_record.append(check_true(y, y_hat)) else: if D3_win.driftCheck(): #detected #print("concept drift detected at {}".format(i)) #retrain the model stream_clf.reset() stream_clf.partial_fit(D3_win.getCurrentData(), D3_win.getCurrentLabels(), classes=stream.target_values) #evaluate and update the model y_hat = stream_clf.predict(X) stream_true = stream_true + check_true(y, y_hat) stream_clf.partial_fit(X, y) stream_acc.append(stream_true / (i + 1)) stream_record.append(check_true(y, y_hat)) #add new sample to the window D3_win.addInstance(X, y) else: #evaluate and update the model y_hat = stream_clf.predict(X) stream_true = stream_true + check_true(y, y_hat)