def unsupervised_analysis(df, nu, size, percent): stream = DataStream(df) stream.prepare_for_use() stream_clf = HoeffdingTree() stream_acc = [] stream_record = [] stream_true= 0 buffer = dataBuffer(size, stream.n_features, percent) clf = svm.OneClassSVM(nu=nu, kernel="rbf", gamma='auto') # start = time.time() X,y = stream.next_sample(size) stream_clf.partial_fit(X,y, classes=stream.target_values) clf.fit(X) i=0 while(stream.has_more_samples()): #stream.has_more_samples() X,y = stream.next_sample() if buffer.isEmpty(): buffer.addInstance(X,y,clf.predict(X)) y_hat = stream_clf.predict(X) stream_true = stream_true + check_true(y, y_hat) stream_clf.partial_fit(X,y) stream_acc.append(stream_true / (i+1)) stream_record.append(check_true(y,y_hat)) else: if buffer.driftCheck(): #detected #print("concept drift detected at {}".format(i)) #retrain the model stream_clf.reset() #stream_clf = HoeffdingTree() stream_clf.partial_fit(buffer.getCurrentData(), buffer.getCurrentLabels(), classes=stream.target_values) #update one-class SVM clf.fit(buffer.getCurrentData()) #evaluate and update the model y_hat = stream_clf.predict(X) stream_true = stream_true + check_true(y, y_hat) stream_clf.partial_fit(X,y) stream_acc.append(stream_true / (i+1)) stream_record.append(check_true(y,y_hat)) #add new sample to the window buffer.addInstance(X,y,clf.predict(X)) else: #evaluate and update the model y_hat = stream_clf.predict(X) stream_true = stream_true + check_true(y, y_hat) stream_clf.partial_fit(X,y) stream_acc.append(stream_true / (i+1)) stream_record.append(check_true(y,y_hat)) #add new sample to the window buffer.addInstance(X,y,clf.predict(X)) i = i + 1 #print(buffer.drift_count) elapsed = format(time.time() - start, '.4f') acc = format(stream_acc[-1] * 100, '.4f') final_accuracy = "Parameters: {}, {}, {}, Final accuracy: {}, Elapsed time: {}".format(nu,size,percent,acc,elapsed) return final_accuracy, stream_record
class cdht(ClassifierMixin, BaseEstimator): def __init__(self, alpha=0.001, drift_detector="KSWIN"): self.classifier = HoeffdingTree() self.init_drift_detection = True self.drift_detector = drift_detector.upper() self.confidence = alpha self.n_detections = 0 def partial_fit(self, X, y, classes=None): """ Calls the MultinomialNB partial_fit from sklearn. ---------- x : array-like, shape = [n_samples, n_features] Training vector, where n_samples in the number of samples and n_features is the number of features. y : array, shape = [n_samples] Target values (integers in classification, real numbers in regression) Returns -------- """ if self.concept_drift_detection(X, y): self.classifier.reset() self.classifier.partial_fit(X, y, classes) return self def predict(self, X): return self.classifier.predict(X) def concept_drift_detection(self, X, Y): if self.init_drift_detection: if self.drift_detector == "KSWIN": self.cdd = [ KSWIN(w_size=100, stat_size=30, alpha=self.confidence) for elem in X.T ] if self.drift_detector == "ADWIN": self.cdd = [ADWIN() for elem in X.T] if self.drift_detector == "DDM": self.cdd = [DDM() for elem in X.T] if self.drift_detector == "EDDM": self.cdd = [EDDM() for elem in X.T] self.init_drift_detection = False self.drift_detected = False if not self.init_drift_detection: for elem, detector in zip(X.T, self.cdd): for e in elem: detector.add_element(e) if detector.detected_change(): self.drift_detected = True self.n_detections = self.n_detections + 1 return self.drift_detected # if name=="__main__": # from skmultiflow import
def parameter_q_and_t(self): accuracy_of_combinations = [] combination = [] quantile_percent = [0.50, 0.75, 1.0] threshold = [0.5, 0.6, 0.7] test_X, test_y = get_data_batches(self.X_array, self.y_array) ensemble_clf = DecisionTreeClassifier() clf = HoeffdingTree() bootstrap_count = 100 for q in quantile_percent: for t in threshold: Train_X = test_X[0] Train_y = test_y[0].flatten() clf = clf.fit(Train_X, Train_y) MPD3_detector = MPD3(bootstrap_count, q, t) ensemble = MPD3_detector.ensemble_bootstrap(Train_X, Train_y) batch_accuracy = [] result = [] for i in range(len(test_X) - 1): index = i + 1 prediction = clf.predict(test_X[index]) batch_accuracy.append( accuracy_score(test_y[index], prediction)) mpd_value = MPD3_detector.MPD_score( test_X[index], ensemble) if MPD3_detector.drift_check(mpd_value): Train_X = test_X[index] Train_y = test_y[index].flatten() clf = clf.partial_fit(Train_X, Train_y) ensemble = MPD3_detector.ensemble_bootstrap( Train_X, Train_y) mean_accuracy = np.average(batch_accuracy) accuracy_of_combinations.append(mean_accuracy) combination.append([q, t]) index_of_max_acc = np.argmax(accuracy_of_combinations) final_q, final_t = combination[index_of_max_acc] return final_q, final_t
# In[ ]: D3_win = D3(w, rho, stream.n_features, auc) stream_acc = [] stream_record = [] stream_true = 0 i = 0 start = time.time() X, y = stream.next_sample(int(w * rho)) stream_clf.partial_fit(X, y, classes=stream.target_values) while (stream.has_more_samples()): X, y = stream.next_sample() if D3_win.isEmpty(): D3_win.addInstance(X, y) y_hat = stream_clf.predict(X) stream_true = stream_true + check_true(y, y_hat) stream_clf.partial_fit(X, y) stream_acc.append(stream_true / (i + 1)) stream_record.append(check_true(y, y_hat)) else: if D3_win.driftCheck(): #detected #print("concept drift detected at {}".format(i)) #retrain the model stream_clf.reset() stream_clf.partial_fit(D3_win.getCurrentData(), D3_win.getCurrentLabels(), classes=stream.target_values) #evaluate and update the model y_hat = stream_clf.predict(X) stream_true = stream_true + check_true(y, y_hat)
### HT Online for RBF### # In[233]: HT = HoeffdingTree() positive = 0 cnt=1 temp_accuracy = [] itr = [] HT_RBF_prediction = [] for i in range(len(RBF_X)): tempx = np.array([RBF_X[i]]) tempy = np.array([RBF_Y[i]]) prediction = HT.predict(tempx) if tempy == prediction: positive += 1 temp_accuracy.append(positive/cnt) HT_RBF_prediction.append(np.int(HT.predict(tempx))) HT.partial_fit(tempx, tempy) #Fitting(training) the model cnt += 1 itr.append(cnt) ACC_HT_RBF = positive/len(RBF_X) print(ACC_HT_RBF) plt.plot(itr, temp_accuracy) plt.title("Temporal Accuracy of RBF HT Online Classifier")