Example #1
0
 def _unit_test_params(cls):
     yield {
         "models": [
             linear_model.LogisticRegression(),
             tree.HoeffdingTreeClassifier(),
             naive_bayes.GaussianNB(),
         ]
     }
Example #2
0
def test_decision_tree_max_depth():

    model = tree.HoeffdingTreeClassifier()

    max_depths = [1, 2, 3, 4, 5, 6]
    models = utils.expand_param_grid(model, {"max_depth": max_depths})

    for model, max_depth in zip(models, max_depths):
        assert model.max_depth == max_depth
Example #3
0
def test_class_splitter(dataset, splitter):
    model = tree.HoeffdingTreeClassifier(splitter=splitter,
                                         grace_period=10,
                                         leaf_prediction="mc",
                                         split_confidence=0.1)

    for x, y in dataset:
        model.learn_one(x, y)

    assert model.height > 0
Example #4
0
    def __init__(self):
        """Create a persistent model file if there isn't one. If one exists, use it."""

        self.file_path = 'models/decision_tree.joblib'
        self.include_hunger = False
        self.accuracy_metric_float = 0.0
        self.metrics = metrics.Accuracy()

        if path.exists(self.file_path):
            self.model = load(self.file_path)
        else:
            self.model = tree.HoeffdingTreeClassifier(grace_period=20)
            self.save_model()
    def __init__(self, my_id=1, bootstrap_servers='', list_of_partitions=[], request_topic='', inference_topic='', group_id='my_grp'):
        """ Constructor
        :type interval: int
        :param interval: Check interval, in seconds
        """
        self.model = tree.HoeffdingTreeClassifier(max_depth=10) 
            # compose.Pipeline(
            # preprocessing.MinMaxScaler(),
            # anomaly.HalfSpaceTrees(seed=42)) 
        self.metric = metrics.ROCAUC() # metrics.Accuracy() # 
        self.my_id = my_id
        self.t = request_topic
        self.result_t = inference_topic
        self.my_grp_id = group_id
        self.result_t_p = 8
        self.bootstrap_servers = bootstrap_servers
#         self.list_of_partitions = list_of_partitions

        self.tls = []
        x = 0
        for i in list_of_partitions:
            self.tls.insert(x, TopicPartition(self.t, i))
            x = x+1
        #self.tls=list_of_partitions
        print(self.tls)
        
        conf = {'bootstrap.servers': bootstrap_servers,
                'sasl.mechanism': 'PLAIN',
                'security.protocol': 'SASL_SSL',
                'ssl.ca.location': '/tmp/cacert.pem',
                'sasl.username': '******',
                'sasl.password': '******',
#                 'sasl.username': '******',
#                 'sasl.password': '******',
                # 'key.serializer': StringSerializer('utf_8'),
                # 'value.serializer': StringSerializer('utf_8'),
                
                'client.id': 'test-sw-1'}
        
        self.producer = Producer(conf)
        conf = {'bootstrap.servers': bootstrap_servers,
                'sasl.mechanism': 'PLAIN',
                'security.protocol': 'SASL_SSL',
                'sasl.username': '******',
                'sasl.password': '******',
                'ssl.ca.location': '/tmp/cacert.pem',
                'group.id': group_id,
                'auto.offset.reset': 'latest'}
        self.consumer = consumer = Consumer(conf)
        self.consumer.assign(self.tls)
Example #6
0
def experiment_ht():
    """Runs experiments for Hoeffding Tree"""
    ht_l = []
    train_time_l = []
    test_time_l = []
    v_m_l = []
    s_m_l = []

    ht = tree.HoeffdingTreeClassifier(max_size=1000, grace_period=2)

    for i in range(X_train.shape[0]):
        X_t = X_r[i]
        y_t = y_r[i]

        idx = range(1024)
        X_t = dict(zip(idx, X_t))

        start_time = time.perf_counter()
        ht.learn_one(X_t, y_t)
        end_time = time.perf_counter()
        train_time_l.append(end_time - start_time)

        if i > 0 and (i + 1) % 100 == 0:
            p_t = 0.0
            start_time = time.perf_counter()
            for j in range(X_test.shape[0]):
                y_pred = ht.predict_one(X_test[j])
                if y_pred == y_test[j]:
                    p_t += 1
            ht_l.append(p_t / X_test.shape[0])
            end_time = time.perf_counter()
            test_time_l.append(end_time - start_time)

            # Check memory
            v_m = psutil.virtual_memory()[2]
            v_m_l.append(v_m)
            s_m = psutil.swap_memory()[3]
            s_m_l.append(s_m)

    # Reformat the train times
    new_train_time_l = []
    for i in range(1, X_train.shape[0]):
        train_time_l[i] += train_time_l[i - 1]
        if i > 0 and (i + 1) % 100 == 0:
            new_train_time_l.append(train_time_l[i])
    train_time_l = new_train_time_l

    return ht_l, train_time_l, test_time_l, v_m_l, s_m_l
Example #7
0
     EvolutionaryOldestBaggingClassifier(population_size=POPULATION_SIZE,
                                         model=AUTOML_CLASSIFICATION_PIPELINE,
                                         param_grid=CLASSIFICATION_PARAM_GRID,
                                         sampling_rate=SAMPLING_RATE)),
    ('EvoAutoML Bagging Best',
     EvolutionaryBaggingClassifier(population_size=POPULATION_SIZE,
                                   model=AUTOML_CLASSIFICATION_PIPELINE,
                                   param_grid=CLASSIFICATION_PARAM_GRID,
                                   sampling_rate=SAMPLING_RATE)),
    ('ARF', ensemble.AdaptiveRandomForestClassifier()),
    ('Leveraging Bagging',
     ensemble.LeveragingBaggingClassifier(model=ENSEMBLE_CLASSIFIER())),
    ('Bagging',
     ensemble.BaggingClassifier(model=ENSEMBLE_CLASSIFIER(), n_models=10)),
    ('SRPC', ensemble.SRPClassifier(n_models=10)),
    ('Hoeffding Tree', tree.HoeffdingTreeClassifier()),
    ('Logistic Regression', linear_model.LogisticRegression()),
    ('HAT', tree.HoeffdingAdaptiveTreeClassifier()),
    ('GaussianNB', naive_bayes.GaussianNB()),
    ('KNN', neighbors.KNNClassifier()),
]

if __name__ == '__main__':

    RESULT_PATH.mkdir(parents=True, exist_ok=True)
    #output = evaluate_ensemble(CLASSIFICATION_TRACKS[1], ENSEMBLE_EVALUATION_MODELS[2])

    pool = Pool(60)  # Create a multiprocessing Pool
    output = pool.starmap(
        evaluate_ensemble,
        list(
"""Anomaly detection example for CPU, RAM, disk usage."""
from random import randint

import gradio as gr
from river import tree

LABELS = {True: 'Abnormal', False: 'Normal'}
# Use decision tree induction algorithm suitable for streaming data
MODEL = tree.HoeffdingTreeClassifier(max_depth=4)


def train_model(iterations: int = 50000) -> None:
    """Train on the assumption that all >50% and at least one >90% is an anomaly."""
    for _ in range(iterations):
        x = {metric: randint(1, 100) for metric in ['cpu', 'ram', 'disk']}
        y = LABELS[min(x.values()) > 50 and max(x.values()) > 90]
        MODEL.learn_one(x, y)


def predict_usage(cpu, ram, disk, is_abnormal):
    """Make the prediction and update with feedback."""
    x = {'cpu': cpu, 'ram': ram, 'disk': disk}
    result = MODEL.predict_proba_one(x), MODEL.debug_one(x)
    MODEL.learn_one(x, LABELS[is_abnormal], sample_weight=100)
    return result


def launch_interface():
    """Launch the Gradio interface."""
    cpu = gr.inputs.Slider(1, 100, 1, 30)
    ram = gr.inputs.Slider(1, 100, 1, 20)
Example #9
0
    return synth.LED(seed=42).take(500)


def get_regression_data():
    return synth.Friedman(seed=42).take(500)


@pytest.mark.parametrize(
    "dataset, model",
    [
        (
            get_classification_data(),
            tree.HoeffdingTreeClassifier(
                leaf_prediction="mc",
                max_size=0.025,
                grace_period=50,
                memory_estimate_period=50,
                splitter=tree.splitter.ExhaustiveSplitter(),
            ),
        ),
        (
            get_classification_data(),
            tree.HoeffdingAdaptiveTreeClassifier(
                leaf_prediction="mc",
                max_size=0.025,
                grace_period=50,
                memory_estimate_period=50,
                splitter=tree.splitter.ExhaustiveSplitter(),
            ),
        ),
        (
tracks = [
    ('Random RBF', random_rbf_track),
    ('AGRAWAL', agrawal_track),
    ('Anomaly Sine', anomaly_sine_track),
    ('Concept Drift', concept_drift_track),
    ('Hyperplane', hyperplane_track),
    ('Mixed', mixed_track),
    ('SEA', sea_track),
    ('Sine', sine_track),
    ('STAGGER', stagger_track)
]

estimator1 = compose.Pipeline(
    preprocessing.StandardScaler(),
    #feature_extraction.PolynomialExtender(),
    tree.HoeffdingTreeClassifier()
)

estimator2 = compose.Pipeline(
    pipelinehelper.PipelineHelperTransformer([
        ('scaler', preprocessing.StandardScaler())
    ]),
    #feature_extraction.PolynomialExtender(),
    ('classifier', tree.HoeffdingTreeClassifier())
)
estimator3 = compose.Pipeline(
    ('scaler', preprocessing.StandardScaler()),
    pipelinehelper.PipelineHelperClassifier([
        ('classifier', tree.HoeffdingTreeClassifier())
    ])
)
Example #11
0
def experiment(angle, classifiers, n_xor, n_rxor, n_test):
    """Perform XOR RXOR(XNOR) XOR experiment"""
    X_xor, y_xor = generate_gaussian_parity(n_xor)
    X_rxor, y_rxor = generate_gaussian_parity(n_rxor, angle_params=angle)
    X_xor_2, y_xor_2 = generate_gaussian_parity(n_xor)
    test_x_xor, test_y_xor = generate_gaussian_parity(n_test)
    test_x_rxor, test_y_rxor = generate_gaussian_parity(n_test,
                                                        angle_params=angle)
    X_stream = np.concatenate((X_xor, X_rxor, X_xor_2), axis=0)
    y_stream = np.concatenate((y_xor, y_rxor, y_xor_2), axis=0)

    # Instantiate classifiers
    if classifiers[0] == 1:
        ht = tree.HoeffdingTreeClassifier(grace_period=2,
                                          split_confidence=1e-01)
    if classifiers[1] == 1:
        mf = MondrianForestClassifier(n_estimators=10)
    if classifiers[2] == 1:
        sdt = DecisionTreeClassifier()
    if classifiers[3] == 1:
        sdf = StreamDecisionForest()
    if classifiers[4] == 1:
        synf = LifelongClassificationForest(default_n_estimators=10)

    errors = np.zeros((10, int(X_stream.shape[0] / 25)))

    for i in range(int(X_stream.shape[0] / 25)):
        X = X_stream[i * 25:(i + 1) * 25]
        y = y_stream[i * 25:(i + 1) * 25]

        # Hoeffding Tree Classifier
        if classifiers[0] == 1:
            ht_partial_fit(ht, X, y)
            ht_xor_y_hat, ht_rxor_y_hat = ht_predict(ht, test_x_xor,
                                                     test_x_rxor)
            errors[0, i] = 1 - np.mean(ht_xor_y_hat == test_y_xor)
            errors[1, i] = 1 - np.mean(ht_rxor_y_hat == test_y_rxor)

        # Mondrian Forest Classifier
        if classifiers[1] == 1:
            mf.partial_fit(X, y)
            mf_xor_y_hat = mf.predict(test_x_xor)
            mf_rxor_y_hat = mf.predict(test_x_rxor)
            errors[2, i] = 1 - np.mean(mf_xor_y_hat == test_y_xor)
            errors[3, i] = 1 - np.mean(mf_rxor_y_hat == test_y_rxor)

        # Stream Decision Tree Classifier
        if classifiers[2] == 1:
            sdt.partial_fit(X, y, classes=[0, 1])
            sdt_xor_y_hat = sdt.predict(test_x_xor)
            sdt_rxor_y_hat = sdt.predict(test_x_rxor)
            errors[4, i] = 1 - np.mean(sdt_xor_y_hat == test_y_xor)
            errors[5, i] = 1 - np.mean(sdt_rxor_y_hat == test_y_rxor)

        # Stream Decision Forest Classifier
        if classifiers[3] == 1:
            sdf.partial_fit(X, y, classes=[0, 1])
            sdf_xor_y_hat = sdf.predict(test_x_xor)
            sdf_rxor_y_hat = sdf.predict(test_x_rxor)
            errors[6, i] = 1 - np.mean(sdf_xor_y_hat == test_y_xor)
            errors[7, i] = 1 - np.mean(sdf_rxor_y_hat == test_y_rxor)

        # Synergistic Forest Classifier
        if classifiers[4] == 1:
            if i == 0:
                synf.add_task(X, y, n_estimators=10, task_id=0)
                synf_xor_y_hat = synf.predict(test_x_xor, task_id=0)
            elif i < (n_xor / 25):
                synf.update_task(X, y, task_id=0)
                synf_xor_y_hat = synf.predict(test_x_xor, task_id=0)
            elif i == (n_xor / 25):
                synf.add_task(X, y, n_estimators=10, task_id=1)
                synf_xor_y_hat = synf.predict(test_x_xor, task_id=0)
                synf_rxor_y_hat = synf.predict(test_x_rxor, task_id=1)
            elif i < (n_xor + n_rxor) / 25:
                synf.update_task(X, y, task_id=1)
                synf_xor_y_hat = synf.predict(test_x_xor, task_id=0)
                synf_rxor_y_hat = synf.predict(test_x_rxor, task_id=1)
            elif i < (2 * n_xor + n_rxor) / 25:
                synf.update_task(X, y, task_id=0)
                synf_xor_y_hat = synf.predict(test_x_xor, task_id=0)
                synf_rxor_y_hat = synf.predict(test_x_rxor, task_id=1)

            if i < (n_xor / 25):
                errors[8, i] = 1 - np.mean(synf_xor_y_hat == test_y_xor)
            if i >= (n_xor / 25):
                errors[8, i] = 1 - np.mean(synf_xor_y_hat == test_y_xor)
                errors[9, i] = 1 - np.mean(synf_rxor_y_hat == test_y_rxor)

    return errors
from river import synth
from river import evaluate
from river import metrics
from river import tree
from river import compose
from river import preprocessing
from river import linear_model

from tqdm import tqdm

scaler = preprocessing.StandardScaler()
log_reg = linear_model.LinearRegression()

hf_tree = tree.HoeffdingTreeClassifier(
    grace_period=100,
    split_confidence=1e-5,
)

model = compose.Pipeline()
model |= hf_tree

for index, raw in tqdm(train_features.iterrows(),
                       total=train_features.shape[0]):
    model.learn_one(raw, train_target[index])

correct_cnt = 0

for index, raw in tqdm(test_features.iterrows(), total=test_features.shape[0]):
    test_pred = model.predict_one(raw)
    if test_pred == test_target[index]:
        correct_cnt += 1