Beispiel #1
0
def unsupervised_analysis(df, nu, size, percent):
    stream = DataStream(df)
    stream.prepare_for_use()
    stream_clf = HoeffdingTree()
    stream_acc = []
    stream_record = []
    stream_true= 0
    buffer = dataBuffer(size, stream.n_features, percent)
    clf = svm.OneClassSVM(nu=nu, kernel="rbf", gamma='auto')
    
    #
    start = time.time()
    X,y = stream.next_sample(size)
    stream_clf.partial_fit(X,y, classes=stream.target_values)
    clf.fit(X)
    
    i=0
    while(stream.has_more_samples()): #stream.has_more_samples()
        X,y = stream.next_sample()
        if buffer.isEmpty():
            buffer.addInstance(X,y,clf.predict(X))
            y_hat = stream_clf.predict(X)
            stream_true = stream_true + check_true(y, y_hat)
            stream_clf.partial_fit(X,y)
            stream_acc.append(stream_true / (i+1))
            stream_record.append(check_true(y,y_hat))
            
        else:
            if buffer.driftCheck():             #detected
                #print("concept drift detected at {}".format(i))
                #retrain the model
                stream_clf.reset()
                #stream_clf = HoeffdingTree()
                stream_clf.partial_fit(buffer.getCurrentData(), buffer.getCurrentLabels(), classes=stream.target_values)
                #update one-class SVM
                clf.fit(buffer.getCurrentData())
                #evaluate and update the model
                y_hat = stream_clf.predict(X)
                stream_true = stream_true + check_true(y, y_hat)
                stream_clf.partial_fit(X,y)
                stream_acc.append(stream_true / (i+1))
                stream_record.append(check_true(y,y_hat))
                #add new sample to the window
                buffer.addInstance(X,y,clf.predict(X))
            else:
                #evaluate and update the model
                y_hat = stream_clf.predict(X)
                stream_true = stream_true + check_true(y, y_hat)
                stream_clf.partial_fit(X,y)
                stream_acc.append(stream_true / (i+1))
                stream_record.append(check_true(y,y_hat))
                #add new sample to the window
                buffer.addInstance(X,y,clf.predict(X))    
        i = i + 1
    #print(buffer.drift_count)
    
    elapsed = format(time.time() - start, '.4f')
    acc = format(stream_acc[-1] * 100, '.4f')
    final_accuracy = "Parameters: {}, {}, {}, Final accuracy: {}, Elapsed time: {}".format(nu,size,percent,acc,elapsed)
    return final_accuracy, stream_record
Beispiel #2
0
class cdht(ClassifierMixin, BaseEstimator):
    def __init__(self, alpha=0.001, drift_detector="KSWIN"):
        self.classifier = HoeffdingTree()
        self.init_drift_detection = True
        self.drift_detector = drift_detector.upper()
        self.confidence = alpha
        self.n_detections = 0

    def partial_fit(self, X, y, classes=None):
        """
            Calls the MultinomialNB partial_fit from sklearn.
            ----------
            x : array-like, shape = [n_samples, n_features]
              Training vector, where n_samples in the number of samples and
              n_features is the number of features.
            y : array, shape = [n_samples]
              Target values (integers in classification, real numbers in
              regression)
            Returns
            --------
            """
        if self.concept_drift_detection(X, y):
            self.classifier.reset()

        self.classifier.partial_fit(X, y, classes)
        return self

    def predict(self, X):
        return self.classifier.predict(X)

    def concept_drift_detection(self, X, Y):
        if self.init_drift_detection:
            if self.drift_detector == "KSWIN":
                self.cdd = [
                    KSWIN(w_size=100, stat_size=30, alpha=self.confidence)
                    for elem in X.T
                ]
            if self.drift_detector == "ADWIN":
                self.cdd = [ADWIN() for elem in X.T]
            if self.drift_detector == "DDM":
                self.cdd = [DDM() for elem in X.T]
            if self.drift_detector == "EDDM":
                self.cdd = [EDDM() for elem in X.T]
            self.init_drift_detection = False
        self.drift_detected = False

        if not self.init_drift_detection:
            for elem, detector in zip(X.T, self.cdd):
                for e in elem:
                    detector.add_element(e)
                    if detector.detected_change():
                        self.drift_detected = True
                        self.n_detections = self.n_detections + 1

        return self.drift_detected


# if name=="__main__":
#     from skmultiflow import
Beispiel #3
0
    def parameter_q_and_t(self):
        accuracy_of_combinations = []
        combination = []
        quantile_percent = [0.50, 0.75, 1.0]
        threshold = [0.5, 0.6, 0.7]
        test_X, test_y = get_data_batches(self.X_array, self.y_array)
        ensemble_clf = DecisionTreeClassifier()
        clf = HoeffdingTree()
        bootstrap_count = 100
        for q in quantile_percent:
            for t in threshold:
                Train_X = test_X[0]
                Train_y = test_y[0].flatten()
                clf = clf.fit(Train_X, Train_y)
                MPD3_detector = MPD3(bootstrap_count, q, t)
                ensemble = MPD3_detector.ensemble_bootstrap(Train_X, Train_y)
                batch_accuracy = []
                result = []
                for i in range(len(test_X) - 1):
                    index = i + 1
                    prediction = clf.predict(test_X[index])
                    batch_accuracy.append(
                        accuracy_score(test_y[index], prediction))
                    mpd_value = MPD3_detector.MPD_score(
                        test_X[index], ensemble)

                    if MPD3_detector.drift_check(mpd_value):
                        Train_X = test_X[index]
                        Train_y = test_y[index].flatten()
                        clf = clf.partial_fit(Train_X, Train_y)
                        ensemble = MPD3_detector.ensemble_bootstrap(
                            Train_X, Train_y)

                mean_accuracy = np.average(batch_accuracy)
                accuracy_of_combinations.append(mean_accuracy)
                combination.append([q, t])
        index_of_max_acc = np.argmax(accuracy_of_combinations)
        final_q, final_t = combination[index_of_max_acc]
        return final_q, final_t
stream_clf = HoeffdingTree()
w = int(sys.argv[2])
rho = float(sys.argv[3])
auc = float(sys.argv[4])

# In[ ]:

D3_win = D3(w, rho, stream.n_features, auc)
stream_acc = []
stream_record = []
stream_true = 0

i = 0
start = time.time()
X, y = stream.next_sample(int(w * rho))
stream_clf.partial_fit(X, y, classes=stream.target_values)
while (stream.has_more_samples()):
    X, y = stream.next_sample()
    if D3_win.isEmpty():
        D3_win.addInstance(X, y)
        y_hat = stream_clf.predict(X)
        stream_true = stream_true + check_true(y, y_hat)
        stream_clf.partial_fit(X, y)
        stream_acc.append(stream_true / (i + 1))
        stream_record.append(check_true(y, y_hat))
    else:
        if D3_win.driftCheck():  #detected
            #print("concept drift detected at {}".format(i))
            #retrain the model
            stream_clf.reset()
            stream_clf.partial_fit(D3_win.getCurrentData(),
Beispiel #5
0
positive = 0
cnt=1
temp_accuracy = []
itr = []
HT_RBF_prediction = []
for i in range(len(RBF_X)):
    tempx = np.array([RBF_X[i]])
    tempy = np.array([RBF_Y[i]])
    prediction = HT.predict(tempx)
    if tempy == prediction:
        positive += 1
    temp_accuracy.append(positive/cnt)
    
    HT_RBF_prediction.append(np.int(HT.predict(tempx)))
    
    HT.partial_fit(tempx, tempy) #Fitting(training) the model
    cnt += 1
    itr.append(cnt) 

ACC_HT_RBF = positive/len(RBF_X)
print(ACC_HT_RBF)
plt.plot(itr, temp_accuracy)
plt.title("Temporal Accuracy of RBF HT Online Classifier")


# In[234]:


### Hoeffding Tree Online Classifier for RBF 10 ###