def test_awe():
    # prepare the stream
    stream = HyperplaneGenerator(random_state=1)
    stream.prepare_for_use()

    # prepare the ensemble
    classifier = AccuracyWeightedEnsemble(n_estimators=5,
                                          n_kept_estimators=10,
                                          base_estimator=NaiveBayes(),
                                          window_size=200,
                                          n_splits=5)

    # test the classifier
    max_samples = 5000
    cnt = 0
    wait_samples = 100
    predictions = array('i')
    correct = 0
    while cnt < max_samples:
        X, y = stream.next_sample()
        pred = classifier.predict(X)
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(int(pred[0]))
        classifier.partial_fit(X, y)
        cnt += 1
        if pred[0] == y:
            correct += 1

    # assert model predictions
    expected_predictions = array('i', [
        0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0,
        1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1,
        1
    ])

    # assert model performance
    expected_accuracy = 0.875
    accuracy = correct / max_samples
    assert expected_accuracy == accuracy

    assert np.alltrue(predictions == expected_predictions)

    # assert model information
    expected_info = "AccuracyWeightedEnsemble: n_estimators: 5 - " \
                    "n_kept_estimators: 10 - " \
                    "base_estimator: NaiveBayes: nominal attributes: [] -  - " \
                    "window_size: 200 - " \
                    "n_splits: 5"
    assert classifier.get_info() == expected_info
Ejemplo n.º 2
0
def test_hyper_plane_generator(test_path):

    stream = HyperplaneGenerator(random_state=112,
                                 n_features=10,
                                 n_drift_features=2,
                                 mag_change=0.6,
                                 noise_percentage=0.28,
                                 sigma_percentage=0.1)
    stream.prepare_for_use()

    n_features = 10
    assert stream.n_remaining_samples() == -1

    expected_names = []
    for i in range(n_features):
        expected_names.append("att_num_" + str(i))
    assert stream.feature_names == expected_names

    assert stream.target_values == [0, 1]

    assert stream.target_names == ["target_0"]

    assert stream.n_features == n_features

    assert stream.n_cat_features == 0

    assert stream.n_targets == 1

    assert stream.get_data_info(
    ) == 'Hyperplane Generator - 1 targets, 2 classes, 10 features'

    assert stream.has_more_samples() is True

    assert stream.is_restartable() is True

    # Load test data corresponding to first 10 instances
    test_file = os.path.join(test_path, 'hyper_plane_stream.npz')
    data = np.load(test_file)
    X_expected = data['X']
    y_expected = data['y']

    X, y = stream.next_sample()
    assert np.alltrue(X[0] == X_expected[0])
    assert np.alltrue(y[0] == y_expected[0])

    X, y = stream.last_sample()
    assert np.alltrue(X[0] == X_expected[0])
    assert np.alltrue(y[0] == y_expected[0])

    stream.restart()
    X, y = stream.next_sample(10)
    assert np.alltrue(X == X_expected)
    assert np.alltrue(y == y_expected)

    assert stream.n_targets == np.array(y).ndim

    assert stream.n_features == X.shape[1]
def test_hyper_plane_generator(test_path):

    stream = HyperplaneGenerator(random_state=112,
                                 n_features=10,
                                 n_drift_features=2,
                                 mag_change=0.6,
                                 noise_percentage=0.28,
                                 sigma_percentage=0.1)
    stream.prepare_for_use()

    n_features = 10
    assert stream.n_remaining_samples() == -1

    expected_names = []
    for i in range(n_features):
        expected_names.append("att_num_" + str(i))
    assert stream.feature_names == expected_names

    assert stream.target_values == [0, 1]

    assert stream.target_names == ["target_0"]

    assert stream.n_features == n_features

    assert stream.n_cat_features == 0

    assert stream.n_targets == 1

    assert stream.get_data_info(
    ) == 'Hyperplane Generator - 1 target(s), 2 classes, 10 features'

    assert stream.has_more_samples() is True

    assert stream.is_restartable() is True

    # Load test data corresponding to first 10 instances
    test_file = os.path.join(test_path, 'hyper_plane_stream.npz')
    data = np.load(test_file)
    X_expected = data['X']
    y_expected = data['y']

    X, y = stream.next_sample()
    assert np.alltrue(X[0] == X_expected[0])
    assert np.alltrue(y[0] == y_expected[0])

    X, y = stream.last_sample()
    assert np.alltrue(X[0] == X_expected[0])
    assert np.alltrue(y[0] == y_expected[0])

    stream.restart()
    X, y = stream.next_sample(10)
    assert np.alltrue(X == X_expected)
    assert np.alltrue(y == y_expected)

    assert stream.n_targets == np.array(y).ndim

    assert stream.n_features == X.shape[1]

    assert 'stream' == stream._estimator_type

    expected_info = "HyperplaneGenerator(mag_change=0.6, n_drift_features=2, n_features=10,\n" \
                    "                    noise_percentage=0.28, random_state=112,\n" \
                    "                    sigma_percentage=0.1)"
    assert stream.get_info() == expected_info

    ### test calculation of sum of weights, and sum of weights+features
    batch_size = 10
    n_features = 2
    stream = HyperplaneGenerator(random_state=112,
                                 n_features=n_features,
                                 n_drift_features=2,
                                 mag_change=0.6,
                                 noise_percentage=0.0,
                                 sigma_percentage=0.1)
    stream.prepare_for_use()

    # check features and weights
    X, y = stream.next_sample(batch_size)
    weights = stream._weights
    w = np.array([0.9750571288732851, 1.2403046199226442])
    data = np.array([[0.950016579405167, 0.07567720470206152],
                     [0.8327457625618593, 0.054805740282408255],
                     [0.8853514580727667, 0.7223465108072455],
                     [0.9811992777207516, 0.34341985076716164],
                     [0.39464258869483526, 0.004944924811720708],
                     [0.9558068694855607, 0.8206093713145775],
                     [0.378544457805313, 0.7847636149698817],
                     [0.5460739378008381, 0.1622260202888307],
                     [0.04500817232778065, 0.33218775732993966],
                     [0.8392114852107733, 0.7093616146129875]])

    assert np.alltrue(weights == w)
    assert np.alltrue(X == data)

    # check labels
    labels = np.zeros([1, batch_size])
    sum_weights = np.sum(weights)
    for i in range(batch_size):
        if weights[0] * data[i, 0] + weights[1] * data[i,
                                                       1] >= 0.5 * sum_weights:
            labels[0, i] = 1

    assert np.alltrue(y == labels)
Ejemplo n.º 4
0
from skmultiflow.data.hyper_plane_generator import HyperplaneGenerator
from ensemble import WeightedEnsembleClassifier
from skmultiflow.evaluation.evaluate_prequential import EvaluatePrequential

seed = 420

hyper_gen = HyperplaneGenerator(random_state=seed,
                                n_features=10,          # number of features to generate
                                n_drift_features=2,     # number of features involved in concept drift (k)
                                mag_change=0.0,         # magnitude of change (t)
                                noise_percentage=0.05,  # noise percentage (p)
                                sigma_percentage=0.1)   # probab that the direction of change is reversed (s_i)
hyper_gen.prepare_for_use()

evaluator = EvaluatePrequential(pretrain_size=1000, max_samples=20000, show_plot=True,
                                metrics=['accuracy', 'kappa'], output_file='result.csv',
                                batch_size=1000)

clf = WeightedEnsembleClassifier()

# 4. Run
evaluator.evaluate(stream=hyper_gen, model=clf)


Ejemplo n.º 5
0
def main():
    usedSynthData = [[
        "synthData/cess_data.csv", "synthData/cess_targets.csv"
    ], ["synthData/move_square_data.csv", "synthData/move_square_targets.csv"],
                     ["synthData/sea_data.csv", "synthData/sea_targets.csv"]]

    #Name of the datastreams
    synthDataStreams_names = [
        "Cess_data",
        "Move_squares",
        "Sea_data",
    ]

    realDataFiles = [
        ["realData/electric_data.csv", "realData/electric_targets.csv"],
        ["realData/poker_data.csv", "realData/poker_targets.csv"],
        ["realData/weather_data.csv", "realData/weather_targets.csv"],
        ["realData/rialto_data.csv", "realData/rialto_targets.csv"]
    ]

    #Name of the datastreams
    realDataStreams_names = ["Electric", "Poker", "Weather", "Rialto"]

    #fixe the poker dataset
    #dfX=pd.read_csv("realData/poker_data_broken.csv")
    #dfY=pd.read_csv(realTargetFiles[1])
    #print(dfX.dtypes)

    #remove the false columns
    #dfX = dfX.drop(columns = ['feat_11', 'feat_12'])
    #print(dfX.dtypes)

    #save fixed data as csv
    #dfX.to_csv(r'realData/poker_data.csv', index = None, header=True)

    #check if saved correctly
    #X=pd.read_csv(realDataFiles[1])
    #print(X.dtypes)

    #fix electirc dataset
    #dfX=pd.read_csv("realData/electric_data_broken.csv")
    #print(dfX.dtypes)

    #remove the false columns
    #dfX = dfX.drop(columns = ['feat_1', 'feat_2'])
    #print(dfX.dtypes)
    #dfX.to_csv(r'realData/electric_data.csv', index = None, header=True)

    #check if saved correctly
    #X=pd.read_csv(realDataFiles[0])
    #print(X.dtypes)

    #Stream with synth generated data from generators, synth data stream that were used in other works and real data streams
    synthDataStreams = [
        [AGRAWALGenerator(random_state=112, perturbation=0.1), "Agrawal"],
        [
            ConceptDriftStream(stream=AGRAWALGenerator(random_state=112),
                               drift_stream=AGRAWALGenerator(random_state=112,
                                                             perturbation=0.1),
                               position=40000,
                               width=10000), "Agrawal_drift"
        ],
        [
            HyperplaneGenerator(mag_change=0.001, noise_percentage=0.1),
            "Hyperplane"
        ],
        [
            ConceptDriftStream(stream=HyperplaneGenerator(),
                               drift_stream=HyperplaneGenerator(),
                               position=40000,
                               width=10000), "Hyperplane_drift"
        ], [SineGenerator(random_state=112), "Sine"],
        [
            ConceptDriftStream(stream=SineGenerator(random_state=112),
                               drift_stream=SineGenerator(random_state=112),
                               position=40000,
                               width=10000), "Sine_drift"
        ]
    ]

    synthDataStreamsUsed = []
    for i in range(len(usedSynthData)):
        synthDataStreamsUsed.append([
            DataStream(pd.read_csv(usedSynthData[i][0]),
                       pd.read_csv(usedSynthData[i][1])),
            synthDataStreams_names[i]
        ])

    realDataStreams = []
    for i in range(len(realDataFiles)):
        realDataStreams.append([
            DataStream(pd.read_csv(realDataFiles[i][0]),
                       pd.read_csv(realDataFiles[i][1])),
            realDataStreams_names[i]
        ])

    clfs = [[RSLVQSgd(), 'RSLVQ_SGD'], [RSLVQAdadelta(), 'RSLVQ_Adadelta'],
            [RSLVQRMSprop(), 'RSLVQ_RMSprop'], [RSLVQAdam(), 'RSLVQ_Adam']]

    max_items = 40000

    #insert the dataset array that should be evaluated, if the reform exception occurs, set the dataset
    #that is effected by it as the first one in the array and run again
    for i in range(len(synthDataStreams)):
        for j in range(len(clfs)):
            #print('bla')
            #custom_evaluation(synthDataStreams[i], clfs[j], max_items, False)
            custom_evaluation(synthDataStreams[i], clfs[j], max_items, True)
Ejemplo n.º 6
0
def init_standard_streams_naive_bayes(
):  # RBF Stream beinhaltet negative Werte daher muss dieser beim Naive Bayes Algortihmus weggelassen werden
    """Initialize standard data streams
    
    Standard streams are inspired by the experiment settings of 
    Gomes, Heitor Murilo & Bifet, Albert & Read, Jesse & Barddal, Jean Paul & 
    Enembreck, Fabrício & Pfahringer, Bernhard & Holmes, Geoff & 
    Abdessalem, Talel. (2017). Adaptive random forests for evolving data 
    stream classification. Machine Learning. 1-27. 10.1007/s10994-017-5642-8. 
    """
    agrawal_a = ConceptDriftStream(stream=AGRAWALGenerator(random_state=112,
                                                           perturbation=0.1),
                                   drift_stream=AGRAWALGenerator(
                                       random_state=112,
                                       classification_function=2,
                                       perturbation=0.1),
                                   random_state=None,
                                   alpha=90.0,
                                   position=21000000)
    agrawal_a.name = "agrawal_a"
    agrawal_g = ConceptDriftStream(stream=AGRAWALGenerator(random_state=112,
                                                           perturbation=0.1),
                                   drift_stream=AGRAWALGenerator(
                                       random_state=112,
                                       classification_function=1,
                                       perturbation=0.1),
                                   random_state=None,
                                   position=21000000,
                                   width=1000000)
    agrawal_g.name = "agrawal_g"
    hyper = HyperplaneGenerator(mag_change=0.001, noise_percentage=0.1)

    led_a = ConceptDriftStream(
        stream=LEDGeneratorDrift(has_noise=False,
                                 noise_percentage=0.0,
                                 n_drift_features=3),
        drift_stream=LEDGeneratorDrift(has_noise=False,
                                       noise_percentage=0.0,
                                       n_drift_features=7),
        random_state=None,
        alpha=90.0,  # angle of change grade 0 - 90
        position=21000000,
        width=1)

    led_a.name = "led_a"
    led_g = ConceptDriftStream(stream=LEDGeneratorDrift(has_noise=False,
                                                        noise_percentage=0.0,
                                                        n_drift_features=3),
                               drift_stream=LEDGeneratorDrift(
                                   has_noise=False,
                                   noise_percentage=0.0,
                                   n_drift_features=7),
                               random_state=None,
                               position=21000000,
                               width=1000000)
    led_g.name = "led_g"
    rand_tree = RandomTreeGenerator()
    rand_tree.name = "rand_tree"
    #rbf_if = RandomRBFGeneratorDrift(change_speed=0.001)
    #rbf_if.name = "rbf_if"
    #rbf_im = RandomRBFGeneratorDrift(change_speed=0.0001)
    #rbf_im.name = "rbf_im"
    sea_a = ConceptDriftStream(stream=SEAGenerator(random_state=112,
                                                   noise_percentage=0.1),
                               drift_stream=SEAGenerator(
                                   random_state=112,
                                   classification_function=2,
                                   noise_percentage=0.1),
                               alpha=90.0,
                               random_state=None,
                               position=21000000,
                               width=1)
    sea_a.name = "sea_a"
    sea_g = ConceptDriftStream(stream=SEAGenerator(random_state=112,
                                                   noise_percentage=0.1),
                               drift_stream=SEAGenerator(
                                   random_state=112,
                                   classification_function=1,
                                   noise_percentage=0.1),
                               random_state=None,
                               position=21000000,
                               width=1000000)
    sea_g.name = "sea_g"
    return [agrawal_a, agrawal_g, hyper, led_a, led_g, rand_tree, sea_a, sea_g]