Beispiel #1
0
def worker(d_indx, key, data):
    print("Dataset: %s start" % key)
    X, y = data

    # b = GaussianNB()
    # b = DecisionTreeClassifier(random_state=1410)
    b = KNeighborsClassifier(weights='distance')
    # b = MLPClassifier(random_state=1410)
    # b = SVC(kernel='rbf', probability=False, random_state=1410)
    n_estimators = 50
    acc_prob = False

    base_clf = StratifiedBagging(base_estimator=b,
                                 ensemble_size=n_estimators,
                                 acc_prob=True,
                                 random_state=1410)

    clfs = {
        "MV-kNN":
        StratifiedBagging(base_estimator=b,
                          ensemble_size=n_estimators,
                          acc_prob=False,
                          random_state=1410),
        "ACC-kNN":
        StratifiedBagging(base_estimator=b,
                          ensemble_size=n_estimators,
                          acc_prob=True,
                          random_state=1410),
        "PRUNE-kNN":
        CMOE(base_estimator=base_clf,
             random_state=1410,
             diversity="kw",
             hard_voting=acc_prob)
    }

    n_splits = 5
    n_repeats = 1
    rskf = RepeatedStratifiedKFold(n_splits=n_splits,
                                   n_repeats=n_repeats,
                                   random_state=1410)
    scores = np.zeros((32, n_splits * n_repeats, len(metrics)))

    for fold_id, (train, test) in enumerate(rskf.split(X, y)):
        print("FOLD %i: " % fold_id)
        for clf_id, clf_name in enumerate(clfs):
            clf = clone(clfs[clf_name])
            clf = clfs[clf_name]
            clf.fit(X[train], y[train])
            y_pred = clf.predict(X[test])
            if clf_id < 2:
                for m_indx, (name, metric) in enumerate(metrics.items()):
                    scores[clf_id, fold_id, m_indx] = metric(y[test], y_pred)
            else:
                for e_n, ensemble_pred in enumerate(y_pred):
                    # print(ensemble_pred)
                    for m_indx, (name, metric) in enumerate(metrics.items()):
                        scores[clf_id + e_n, fold_id,
                               m_indx] = metric(y[test], ensemble_pred)
    np.save("results/knn/%s_knn" % key, scores)
    print("Dataset: %s end" % key)
def worker(d_indx, key, data):
    print("Dataset: %s start" % key)
    X, y = data


    b = GaussianNB()
    # b = DecisionTreeClassifier(random_state=1410)
    # b = KNeighborsClassifier(weights='distance')
    # b = MLPClassifier(random_state=1410)
    # b = SVC(kernel='rbf', probability=False, random_state=1410)
    n_estimators = 50
    acc_prob = False

    base_clf = StratifiedBagging(base_estimator = b, ensemble_size=n_estimators, acc_prob=True, random_state=1410)

    clfs = {
        "MV-GNB-ROS": StratifiedBagging(base_estimator = b, ensemble_size=n_estimators, acc_prob=True, random_state=1410, oversampled="ROS"),
        "MV-GNB-SMOTE": StratifiedBagging(base_estimator = b, ensemble_size=n_estimators, acc_prob=True, random_state=1410, oversampled="SMOTE"),
        "MV-GNB-SVMSMOTE": StratifiedBagging(base_estimator = b, ensemble_size=n_estimators, acc_prob=True, random_state=1410, oversampled="SVMSMOTE"),
        "MV-GNB-B2SMOTE": StratifiedBagging(base_estimator = b, ensemble_size=n_estimators, acc_prob=True, random_state=1410, oversampled="B2SMOTE"),
    }

    n_splits = 5
    n_repeats = 1
    rskf = RepeatedStratifiedKFold(n_splits=n_splits, n_repeats=n_repeats, random_state=1410)
    scores = np.zeros((len(clfs), n_splits * n_repeats, len(metrics)))

    for fold_id, (train, test) in enumerate(rskf.split(X, y)):
        print("FOLD %i: " % fold_id)
        for clf_id, clf_name in enumerate(clfs):
            clf = clone(clfs[clf_name])
            clf = clfs[clf_name]
            clf.fit(X[train], y[train])
            y_pred = clf.predict(X[test])
            for m_indx, (name, metric) in enumerate(metrics.items()):
                scores[clf_id, fold_id, m_indx] = metric(y[test], y_pred)
    np.save("results/gnb/%s_gnb_preproc" % key, scores)
    print("Dataset: %s end" % key)
from skmultiflow.trees import HoeffdingTree

if len(sys.argv) != 2:
    print("PODAJ RS")
    exit()
else:
    random_state = int(sys.argv[1])

print(random_state)

# Select streams and methods
streams = h.toystreams(random_state)

print(len(streams))

sea = SEA(base_estimator=StratifiedBagging(base_estimator=GaussianNB(),
                                           random_state=42))
knorau1 = SEA(base_estimator=StratifiedBagging(base_estimator=GaussianNB(),
                                               random_state=42),
              des="KNORAU1")
knorau2 = SEA(base_estimator=StratifiedBagging(base_estimator=GaussianNB(),
                                               random_state=42),
              des="KNORAU2")
knorae1 = SEA(base_estimator=StratifiedBagging(base_estimator=GaussianNB(),
                                               random_state=42),
              des="KNORAE1")
knorae2 = SEA(base_estimator=StratifiedBagging(base_estimator=GaussianNB(),
                                               random_state=42),
              des="KNORAE2")

clfs = (sea, knorau1, knorau2, knorae1, knorae2)
from sklearn.svm import SVC

if len(sys.argv) != 2:
    print("PODAJ RS")
    exit()
else:
    random_state = int(sys.argv[1])

print(random_state)

# Select streams and methods
streams = h.toystreams(random_state)

print(len(streams))

none_knorau1 = SEA(base_estimator=StratifiedBagging(base_estimator=SVC(
    probability=True, random_state=42), random_state=42, oversampler="None"), oversampled="None", des="KNORAU1")
rus_knorau1 = SEA(base_estimator=StratifiedBagging(base_estimator=SVC(
    probability=True, random_state=42), random_state=42, oversampler="RUS"), oversampled="RUS", des="KNORAU1")
cnn_knorau1 = SEA(base_estimator=StratifiedBagging(base_estimator=SVC(
    probability=True, random_state=42), random_state=42, oversampler="CNN"), oversampled="CNN", des="KNORAU1")
none_knorae1 = SEA(base_estimator=StratifiedBagging(base_estimator=SVC(
    probability=True, random_state=42), random_state=42, oversampler="None"), oversampled="None", des="KNORAE1")
rus_knorae1 = SEA(base_estimator=StratifiedBagging(base_estimator=SVC(
    probability=True, random_state=42), random_state=42, oversampler="RUS"), oversampled="RUS", des="KNORAE1")
cnn_knorae1 = SEA(base_estimator=StratifiedBagging(base_estimator=SVC(
    probability=True, random_state=42), random_state=42, oversampler = "CNN"), oversampled="CNN" ,des="KNORAE1")

clfs = (none_knorau1, rus_knorau1, cnn_knorau1, none_knorae1, rus_knorae1, cnn_knorae1)

# Define worker
def worker(i, stream_n):
Beispiel #5
0
    exit()
else:
    random_state = int(sys.argv[1])

print(random_state)

# Select streams and methods
streams = h.streams(random_state)

print(len(streams))

ob = OnlineBagging(n_estimators=20, base_estimator=GaussianNB())
oob = OOB(n_estimators=20, base_estimator=GaussianNB())
uob = UOB(n_estimators=20, base_estimator=GaussianNB())
ros_knorau2 = SEA(base_estimator=StratifiedBagging(base_estimator=GaussianNB(),
                                                   random_state=42,
                                                   oversampler="ROS"),
                  oversampled="ROS",
                  des="KNORAU2")
cnn_knorau2 = SEA(base_estimator=StratifiedBagging(base_estimator=GaussianNB(),
                                                   random_state=42,
                                                   oversampler="CNN"),
                  oversampled="CNN",
                  des="KNORAU2")
ros_knorae2 = SEA(base_estimator=StratifiedBagging(base_estimator=GaussianNB(),
                                                   random_state=42,
                                                   oversampler="ROS"),
                  oversampled="ROS",
                  des="KNORAE2")
cnn_knorae2 = SEA(base_estimator=StratifiedBagging(base_estimator=GaussianNB(),
                                                   random_state=42,
if len(sys.argv) != 2:
    print("PODAJ RS")
    exit()
else:
    random_state = int(sys.argv[1])

print(random_state)

# Select streams and methods
streams = h.toystreams(random_state)

print(len(streams))

sea = SEA(base_estimator=StratifiedBagging(base_estimator=HoeffdingTree(
    split_criterion='hellinger'),
                                           random_state=42))
knorau1 = SEA(base_estimator=StratifiedBagging(
    base_estimator=HoeffdingTree(split_criterion='hellinger'),
    random_state=42),
              des="KNORAU1")
knorau2 = SEA(base_estimator=StratifiedBagging(
    base_estimator=HoeffdingTree(split_criterion='hellinger'),
    random_state=42),
              des="KNORAU2")
knorae1 = SEA(base_estimator=StratifiedBagging(
    base_estimator=HoeffdingTree(split_criterion='hellinger'),
    random_state=42),
              des="KNORAE1")
knorae2 = SEA(base_estimator=StratifiedBagging(
    base_estimator=HoeffdingTree(split_criterion='hellinger'),
if len(sys.argv) != 2:
    print("PODAJ RS")
    exit()
else:
    random_state = int(sys.argv[1])

print(random_state)

# Select streams and methods
streams = h.toystreams(random_state)

print(len(streams))

none_knorau1 = SEA(base_estimator=StratifiedBagging(
    base_estimator=KNeighborsClassifier(weights='distance'),
    random_state=42,
    oversampler="None"),
                   oversampled="None",
                   des="KNORAU1")
ros_knorau1 = SEA(base_estimator=StratifiedBagging(
    base_estimator=KNeighborsClassifier(weights='distance'),
    random_state=42,
    oversampler="ROS"),
                  oversampled="ROS",
                  des="KNORAU1")
b2_knorau1 = SEA(base_estimator=StratifiedBagging(
    base_estimator=KNeighborsClassifier(weights='distance'),
    random_state=42,
    oversampler="B2"),
                 oversampled="B2",
                 des="KNORAU1")
)
import sys
from sklearn.base import clone
from sklearn.tree import DecisionTreeClassifier
from skmultiflow.trees import HoeffdingTree

# Select streams and methods
streams = h.realstreams()
print(len(streams))

ob = OnlineBagging(n_estimators=20, base_estimator=GaussianNB())
oob = OOB(n_estimators=20, base_estimator=GaussianNB())
uob = UOB(n_estimators=20, base_estimator=GaussianNB())
# sea = SEA(base_estimator=StratifiedBagging(base_estimator=GaussianNB(
), random_state=42))
ros_knorau2 = SEA(base_estimator=StratifiedBagging(base_estimator=GaussianNB(
), random_state=42, oversampler="ROS"), oversampled="ROS", des="KNORAU2")
cnn_knorau2 = SEA(base_estimator=StratifiedBagging(base_estimator=GaussianNB(
), random_state=42, oversampler="CNN"), oversampled="CNN", des="KNORAU2")
ros_knorae2 = SEA(base_estimator=StratifiedBagging(base_estimator=GaussianNB(
), random_state=42, oversampler="ROS"), oversampled="ROS", des="KNORAE2")
cnn_knorae2 = SEA(base_estimator=StratifiedBagging(base_estimator=GaussianNB(), random_state=42, oversampler = "CNN"), oversampled="CNN" ,des="KNORAE2")

clfs = (ob, oob, uob, ros_knorau2, cnn_knorau2, ros_knorae2, cnn_knorae2)

# Define worker
def worker(i, stream_n):
    stream = streams[stream_n]
    key = list(streams.keys())[i]

    cclfs = [clone(clf) for clf in clfs]
Beispiel #9
0
from csm import SEA, StratifiedBagging, REA, LearnppCDS, LearnppNIE, OUSE, KMeanClustering, rea, TestThenTrain
from sklearn.naive_bayes import GaussianNB
from strlearn.streams import StreamGenerator
# from strlearn.evaluators import TestThenTrain
from strlearn.metrics import (balanced_accuracy_score, f1_score,
                              geometric_mean_score_1, precision, recall,
                              specificity)
import matplotlib.pyplot as plt
from scipy.ndimage.filters import gaussian_filter1d
import numpy as np
from sklearn.metrics import roc_auc_score

rea = REA(base_classifier=StratifiedBagging(base_estimator=GaussianNB(),
                                            random_state=42),
          number_of_classifiers=5)
cds = LearnppCDS(base_classifier=StratifiedBagging(base_estimator=GaussianNB(),
                                                   random_state=42),
                 number_of_classifiers=5)
nie = LearnppNIE(base_classifier=StratifiedBagging(base_estimator=GaussianNB(),
                                                   random_state=42),
                 number_of_classifiers=5)
ouse = OUSE(base_classifier=StratifiedBagging(base_estimator=GaussianNB(),
                                              random_state=42),
            number_of_classifiers=5)

kmc = KMeanClustering(base_classifier=StratifiedBagging(
    base_estimator=GaussianNB(), random_state=42),
                      number_of_classifiers=5)

sea = SEA(base_estimator=StratifiedBagging(base_estimator=GaussianNB(),
                                           random_state=42),
Beispiel #10
0
from skmultiflow.trees import HoeffdingTree

if len(sys.argv) != 2:
    print("PODAJ RS")
    exit()
else:
    random_state = int(sys.argv[1])

print(random_state)

# Select streams and methods
streams = h.toystreams(random_state)

print(len(streams))

none_knorau2 = SEA(base_estimator=StratifiedBagging(
    base_estimator=GaussianNB(), random_state=42, oversampler="None"),
                   oversampled="None",
                   des="KNORAU2")
ros_knorau2 = SEA(base_estimator=StratifiedBagging(base_estimator=GaussianNB(),
                                                   random_state=42,
                                                   oversampler="ROS"),
                  oversampled="ROS",
                  des="KNORAU2")
b2_knorau2 = SEA(base_estimator=StratifiedBagging(base_estimator=GaussianNB(),
                                                  random_state=42,
                                                  oversampler="B2"),
                 oversampled="B2",
                 des="KNORAU2")
none_knorae2 = SEA(base_estimator=StratifiedBagging(
    base_estimator=GaussianNB(), random_state=42, oversampler="None"),
                   oversampled="None",
    recall,
    specificity
)
import sys
from sklearn.base import clone
from sklearn.tree import DecisionTreeClassifier
from skmultiflow.trees import HoeffdingTree
import time
import matplotlib.pyplot as plt

# Select streams and methods
streams = h.timestream(100)

print(len(streams))

ros_knorau2_3 = SEA(base_estimator=StratifiedBagging(base_estimator=GaussianNB(
), random_state=42, oversampler="ROS"), oversampled="ROS", des="KNORAU2", n_estimators=3)
ros_knorau2_5 = SEA(base_estimator=StratifiedBagging(base_estimator=GaussianNB(
), random_state=42, oversampler="CNN"), oversampled="CNN", des="KNORAU2", n_estimators=5)
ros_knorau2_10 = SEA(base_estimator=StratifiedBagging(base_estimator=GaussianNB(
), random_state=42, oversampler="ROS"), oversampled="ROS", des="KNORAU2", n_estimators=10)
ros_knorau2_15 = SEA(base_estimator=StratifiedBagging(base_estimator=GaussianNB(
), random_state=42, oversampler="ROS"), oversampled="ROS", des="KNORAU2", n_estimators=15)
ros_knorau2_30 = SEA(base_estimator=StratifiedBagging(base_estimator=GaussianNB(
), random_state=42, oversampler="ROS"), oversampled="ROS", des="KNORAU2", n_estimators=30)

# cnn_knorau2 = SEA(base_estimator=StratifiedBagging(base_estimator=GaussianNB(
# ), random_state=42, oversampler="CNN"), oversampled="CNN", des="KNORAU2")
# ros_knorae2 = SEA(base_estimator=StratifiedBagging(base_estimator=GaussianNB(
# ), random_state=42, oversampler="ROS"), oversampled="ROS", des="KNORAE2")
# cnn_knorae2 = SEA(base_estimator=StratifiedBagging(base_estimator=GaussianNB(
# ), random_state=42, oversampler="CNN"), oversampled="CNN", des="KNORAE2")
Beispiel #12
0
from sklearn.neighbors import KNeighborsClassifier

if len(sys.argv) != 2:
    print("PODAJ RS")
    exit()
else:
    random_state = int(sys.argv[1])

print(random_state)

# Select streams and methods
streams = h.toystreams(random_state)

print(len(streams))

sea = SEA(base_estimator=StratifiedBagging(
    base_estimator=KNeighborsClassifier(weights='distance'), random_state=42))
knorau1 = SEA(base_estimator=StratifiedBagging(
    base_estimator=KNeighborsClassifier(weights='distance'), random_state=42),
              des="KNORAU1")
knorau2 = SEA(base_estimator=StratifiedBagging(
    base_estimator=KNeighborsClassifier(weights='distance'), random_state=42),
              des="KNORAU2")
knorae1 = SEA(base_estimator=StratifiedBagging(
    base_estimator=KNeighborsClassifier(weights='distance'), random_state=42),
              des="KNORAE1")
knorae2 = SEA(base_estimator=StratifiedBagging(
    base_estimator=KNeighborsClassifier(weights='distance'), random_state=42),
              des="KNORAE2")
clfs = (sea, knorau1, knorau2, knorae1, knorae2)

# Define worker
from sklearn.svm import SVC

if len(sys.argv) != 2:
    print("PODAJ RS")
    exit()
else:
    random_state = int(sys.argv[1])

print(random_state)

# Select streams and methods
streams = h.toystreams(random_state)

print(len(streams))

sea = SEA(base_estimator=StratifiedBagging(
    base_estimator=SVC(probability=True, random_state=42), random_state=42))
knorau1 = SEA(base_estimator=StratifiedBagging(base_estimator=SVC(
    probability=True, random_state=42),
                                               random_state=42),
              des="KNORAU1")
knorau2 = SEA(base_estimator=StratifiedBagging(base_estimator=SVC(
    probability=True, random_state=42),
                                               random_state=42),
              des="KNORAU2")
knorae1 = SEA(base_estimator=StratifiedBagging(base_estimator=SVC(
    probability=True, random_state=42),
                                               random_state=42),
              des="KNORAE1")
knorae2 = SEA(base_estimator=StratifiedBagging(base_estimator=SVC(
    probability=True, random_state=42),
                                               random_state=42),
if len(sys.argv) != 2:
    print("PODAJ RS")
    exit()
else:
    random_state = int(sys.argv[1])

print(random_state)

# Select streams and methods
streams = h.toystreams(random_state)

print(len(streams))

none_knorau1 = SEA(base_estimator=StratifiedBagging(base_estimator=SVC(
    probability=True, random_state=42),
                                                    random_state=42,
                                                    oversampler="None"),
                   oversampled="None",
                   des="KNORAU1")
ros_knorau1 = SEA(base_estimator=StratifiedBagging(base_estimator=SVC(
    probability=True, random_state=42),
                                                   random_state=42,
                                                   oversampler="ROS"),
                  oversampled="ROS",
                  des="KNORAU1")
b2_knorau1 = SEA(base_estimator=StratifiedBagging(base_estimator=SVC(
    probability=True, random_state=42),
                                                  random_state=42,
                                                  oversampler="B2"),
                 oversampled="B2",
                 des="KNORAU1")
    geometric_mean_score_1,
    precision,
    recall,
    specificity
)
import sys
from sklearn.base import clone
from sklearn.tree import DecisionTreeClassifier
from skmultiflow.trees import HoeffdingTree

# Select streams and methods
streams = h.moa_streams()

print(len(streams))

rea = REA(base_classifier=StratifiedBagging(base_estimator=GaussianNB(
), random_state=42), number_of_classifiers=5)
cds = LearnppCDS(base_classifier=StratifiedBagging(base_estimator=GaussianNB(
), random_state=42), number_of_classifiers=5)
nie = LearnppNIE(base_classifier=StratifiedBagging(base_estimator=GaussianNB(
), random_state=42), number_of_classifiers=5)
ouse = OUSE(base_classifier=StratifiedBagging(base_estimator=GaussianNB(
), random_state=42), number_of_classifiers=5)
kmc = KMeanClustering(base_classifier=StratifiedBagging(base_estimator=GaussianNB(
), random_state=42), number_of_classifiers=5)
ros_knorau2 = SEA(base_estimator=StratifiedBagging(base_estimator=GaussianNB(
), random_state=42, oversampler="ROS"), oversampled="ROS", des="KNORAU2")
cnn_knorau2 = SEA(base_estimator=StratifiedBagging(base_estimator=GaussianNB(
), random_state=42, oversampler="CNN"), oversampled="CNN", des="KNORAU2")
ros_knorae2 = SEA(base_estimator=StratifiedBagging(base_estimator=GaussianNB(
), random_state=42, oversampler="ROS"), oversampled="ROS", des="KNORAE2")
cnn_knorae2 = SEA(base_estimator=StratifiedBagging(base_estimator=GaussianNB(), random_state=42, oversampler = "CNN"), oversampled="CNN" ,des="KNORAE2")
Beispiel #16
0
from sklearn.neural_network import MLPClassifier

if len(sys.argv) != 2:
    print("PODAJ RS")
    exit()
else:
    random_state = int(sys.argv[1])

print(random_state)

# Select streams and methods
streams = h.toystreams(random_state)

print(len(streams))

gnb = SEA(base_estimator=StratifiedBagging(base_estimator=GaussianNB(), random_state=42))
ht = SEA(base_estimator=StratifiedBagging(base_estimator=HoeffdingTree(), random_state=42))

clfs = (gnb, ht)

# Define worker
def worker(i, stream_n):
    stream = streams[stream_n]
    cclfs = [clone(clf) for clf in clfs]

    print("Starting stream %i/%i" % (i + 1, len(streams)))

    eval = TestThenTrain(metrics=(
        balanced_accuracy_score,
        geometric_mean_score_1,
        f1_score,
Beispiel #17
0
from sklearn.base import clone
from sklearn.tree import DecisionTreeClassifier
from skmultiflow.trees import HoeffdingTree

# Select streams and methods
streams = h.realstreams()
print(len(streams))

ob = OnlineBagging(n_estimators=20,
                   base_estimator=HoeffdingTree(split_criterion='hellinger'))
oob = OOB(n_estimators=20,
          base_estimator=HoeffdingTree(split_criterion='hellinger'))
uob = UOB(n_estimators=20,
          base_estimator=HoeffdingTree(split_criterion='hellinger'))
ros_knorau2 = SEA(base_estimator=StratifiedBagging(
    base_estimator=HoeffdingTree(split_criterion='hellinger'),
    random_state=42,
    oversampler="ROS"),
                  oversampled="ROS",
                  des="KNORAU2")
cnn_knorau2 = SEA(base_estimator=StratifiedBagging(
    base_estimator=HoeffdingTree(split_criterion='hellinger'),
    random_state=42,
    oversampler="CNN"),
                  oversampled="CNN",
                  des="KNORAU2")
ros_knorae2 = SEA(base_estimator=StratifiedBagging(
    base_estimator=HoeffdingTree(split_criterion='hellinger'),
    random_state=42,
    oversampler="ROS"),
                  oversampled="ROS",
                  des="KNORAE2")
from strlearn.evaluators import TestThenTrain
from sklearn.naive_bayes import GaussianNB
from strlearn.metrics import (balanced_accuracy_score, f1_score,
                              geometric_mean_score_1, precision, recall,
                              specificity)
import sys
from sklearn.base import clone
from sklearn.tree import DecisionTreeClassifier
from skmultiflow.trees import HoeffdingTree

# Select streams and methods
streams = h.realstreams2()
print(len(streams))

rea = REA(base_classifier=StratifiedBagging(
    base_estimator=HoeffdingTree(split_criterion='hellinger'),
    random_state=42),
          number_of_classifiers=5)
cds = LearnppCDS(base_classifier=StratifiedBagging(
    base_estimator=HoeffdingTree(split_criterion='hellinger'),
    random_state=42),
                 number_of_classifiers=5)
nie = LearnppNIE(base_classifier=StratifiedBagging(
    base_estimator=HoeffdingTree(split_criterion='hellinger'),
    random_state=42),
                 number_of_classifiers=5)
ouse = OUSE(base_classifier=StratifiedBagging(
    base_estimator=HoeffdingTree(split_criterion='hellinger'),
    random_state=42),
            number_of_classifiers=5)
kmc = KMeanClustering(base_classifier=StratifiedBagging(