def __init__(self, concept_id=0, seed=None, noise=0):
     self.cf = concept_id
     self.seed = seed
     stream = AGRAWALGenerator(concept_id,
                               random_state=seed,
                               perturbation=noise)
     stream.prepare_for_use()
     super().__init__(stream)
Example #2
0
def test_agrawal_generator_all_functions(test_path):
    for f in range(10):
        stream = AGRAWALGenerator(classification_function=f, random_state=1)

        # Load test data corresponding to first 10 instances
        test_file = os.path.join(test_path, 'agrawal_stream_{}.npz'.format(f))
        data = np.load(test_file)
        X_expected = data['X']
        y_expected = data['y']

        X, y = stream.next_sample(10)
        assert np.alltrue(X == X_expected)
        assert np.alltrue(y == y_expected)
Example #3
0
def test_agrawal_drift(test_path):
    stream = AGRAWALGenerator(random_state=1)
    stream.prepare_for_use()
    X, y = stream.next_sample(10)
    stream.generate_drift()
    X_drift, y_drift = stream.next_sample(10)

    # Load test data corresponding to first 10 instances
    test_file = os.path.join(test_path, 'agrawal_stream_drift.npz')
    data = np.load(test_file)
    X_expected = data['X']
    y_expected = data['y']

    X = np.concatenate((X, X_drift))
    y = np.concatenate((y, y_drift))
    assert np.alltrue(X == X_expected)
    assert np.alltrue(y == y_expected)
Example #4
0
def test_agrawal_generator(test_path):
    stream = AGRAWALGenerator(classification_function=2,
                              random_state=112,
                              balance_classes=False,
                              perturbation=0.28)

    assert stream.n_remaining_samples() == -1

    expected_names = [
        "salary", "commission", "age", "elevel", "car", "zipcode", "hvalue",
        "hyears", "loan"
    ]
    assert stream.feature_names == expected_names

    expected_targets = [0, 1]
    assert stream.target_values == expected_targets

    assert stream.target_names == ['target']

    assert stream.n_features == 9

    assert stream.n_cat_features == 3

    assert stream.n_num_features == 6

    assert stream.n_targets == 1

    assert stream.get_data_info(
    ) == 'AGRAWAL Generator - 1 target(s), 2 classes, 9 features'

    assert stream.has_more_samples() is True

    assert stream.is_restartable() is True

    # Load test data corresponding to first 10 instances
    test_file = os.path.join(test_path, 'agrawal_stream.npz')
    data = np.load(test_file)
    X_expected = data['X']
    y_expected = data['y']

    X, y = stream.next_sample()
    assert np.alltrue(X[0] == X_expected[0])
    assert np.alltrue(y[0] == y_expected[0])

    X, y = stream.last_sample()
    assert np.alltrue(X[0] == X_expected[0])
    assert np.alltrue(y[0] == y_expected[0])

    stream.restart()
    X, y = stream.next_sample(10)
    assert np.alltrue(X == X_expected)
    assert np.alltrue(y == y_expected)

    assert stream.n_targets == np.array(y).ndim

    assert stream.n_features == X.shape[1]

    assert 'stream' == stream._estimator_type

    expected_info = "AGRAWALGenerator(balance_classes=False, classification_function=2,\n" \
                    "                 perturbation=0.28, random_state=112)"
    assert stream.get_info() == expected_info
Example #5
0
def main():
    usedSynthData = [[
        "synthData/cess_data.csv", "synthData/cess_targets.csv"
    ], ["synthData/move_square_data.csv", "synthData/move_square_targets.csv"],
                     ["synthData/sea_data.csv", "synthData/sea_targets.csv"]]

    #Name of the datastreams
    synthDataStreams_names = [
        "Cess_data",
        "Move_squares",
        "Sea_data",
    ]

    realDataFiles = [
        ["realData/electric_data.csv", "realData/electric_targets.csv"],
        ["realData/poker_data.csv", "realData/poker_targets.csv"],
        ["realData/weather_data.csv", "realData/weather_targets.csv"],
        ["realData/rialto_data.csv", "realData/rialto_targets.csv"]
    ]

    #Name of the datastreams
    realDataStreams_names = ["Electric", "Poker", "Weather", "Rialto"]

    #fixe the poker dataset
    #dfX=pd.read_csv("realData/poker_data_broken.csv")
    #dfY=pd.read_csv(realTargetFiles[1])
    #print(dfX.dtypes)

    #remove the false columns
    #dfX = dfX.drop(columns = ['feat_11', 'feat_12'])
    #print(dfX.dtypes)

    #save fixed data as csv
    #dfX.to_csv(r'realData/poker_data.csv', index = None, header=True)

    #check if saved correctly
    #X=pd.read_csv(realDataFiles[1])
    #print(X.dtypes)

    #fix electirc dataset
    #dfX=pd.read_csv("realData/electric_data_broken.csv")
    #print(dfX.dtypes)

    #remove the false columns
    #dfX = dfX.drop(columns = ['feat_1', 'feat_2'])
    #print(dfX.dtypes)
    #dfX.to_csv(r'realData/electric_data.csv', index = None, header=True)

    #check if saved correctly
    #X=pd.read_csv(realDataFiles[0])
    #print(X.dtypes)

    #Stream with synth generated data from generators, synth data stream that were used in other works and real data streams
    synthDataStreams = [
        [AGRAWALGenerator(random_state=112, perturbation=0.1), "Agrawal"],
        [
            ConceptDriftStream(stream=AGRAWALGenerator(random_state=112),
                               drift_stream=AGRAWALGenerator(random_state=112,
                                                             perturbation=0.1),
                               position=40000,
                               width=10000), "Agrawal_drift"
        ],
        [
            HyperplaneGenerator(mag_change=0.001, noise_percentage=0.1),
            "Hyperplane"
        ],
        [
            ConceptDriftStream(stream=HyperplaneGenerator(),
                               drift_stream=HyperplaneGenerator(),
                               position=40000,
                               width=10000), "Hyperplane_drift"
        ], [SineGenerator(random_state=112), "Sine"],
        [
            ConceptDriftStream(stream=SineGenerator(random_state=112),
                               drift_stream=SineGenerator(random_state=112),
                               position=40000,
                               width=10000), "Sine_drift"
        ]
    ]

    synthDataStreamsUsed = []
    for i in range(len(usedSynthData)):
        synthDataStreamsUsed.append([
            DataStream(pd.read_csv(usedSynthData[i][0]),
                       pd.read_csv(usedSynthData[i][1])),
            synthDataStreams_names[i]
        ])

    realDataStreams = []
    for i in range(len(realDataFiles)):
        realDataStreams.append([
            DataStream(pd.read_csv(realDataFiles[i][0]),
                       pd.read_csv(realDataFiles[i][1])),
            realDataStreams_names[i]
        ])

    clfs = [[RSLVQSgd(), 'RSLVQ_SGD'], [RSLVQAdadelta(), 'RSLVQ_Adadelta'],
            [RSLVQRMSprop(), 'RSLVQ_RMSprop'], [RSLVQAdam(), 'RSLVQ_Adam']]

    max_items = 40000

    #insert the dataset array that should be evaluated, if the reform exception occurs, set the dataset
    #that is effected by it as the first one in the array and run again
    for i in range(len(synthDataStreams)):
        for j in range(len(clfs)):
            #print('bla')
            #custom_evaluation(synthDataStreams[i], clfs[j], max_items, False)
            custom_evaluation(synthDataStreams[i], clfs[j], max_items, True)
Example #6
0
def init_standard_streams_naive_bayes(
):  # RBF Stream beinhaltet negative Werte daher muss dieser beim Naive Bayes Algortihmus weggelassen werden
    """Initialize standard data streams
    
    Standard streams are inspired by the experiment settings of 
    Gomes, Heitor Murilo & Bifet, Albert & Read, Jesse & Barddal, Jean Paul & 
    Enembreck, Fabrício & Pfahringer, Bernhard & Holmes, Geoff & 
    Abdessalem, Talel. (2017). Adaptive random forests for evolving data 
    stream classification. Machine Learning. 1-27. 10.1007/s10994-017-5642-8. 
    """
    agrawal_a = ConceptDriftStream(stream=AGRAWALGenerator(random_state=112,
                                                           perturbation=0.1),
                                   drift_stream=AGRAWALGenerator(
                                       random_state=112,
                                       classification_function=2,
                                       perturbation=0.1),
                                   random_state=None,
                                   alpha=90.0,
                                   position=21000000)
    agrawal_a.name = "agrawal_a"
    agrawal_g = ConceptDriftStream(stream=AGRAWALGenerator(random_state=112,
                                                           perturbation=0.1),
                                   drift_stream=AGRAWALGenerator(
                                       random_state=112,
                                       classification_function=1,
                                       perturbation=0.1),
                                   random_state=None,
                                   position=21000000,
                                   width=1000000)
    agrawal_g.name = "agrawal_g"
    hyper = HyperplaneGenerator(mag_change=0.001, noise_percentage=0.1)

    led_a = ConceptDriftStream(
        stream=LEDGeneratorDrift(has_noise=False,
                                 noise_percentage=0.0,
                                 n_drift_features=3),
        drift_stream=LEDGeneratorDrift(has_noise=False,
                                       noise_percentage=0.0,
                                       n_drift_features=7),
        random_state=None,
        alpha=90.0,  # angle of change grade 0 - 90
        position=21000000,
        width=1)

    led_a.name = "led_a"
    led_g = ConceptDriftStream(stream=LEDGeneratorDrift(has_noise=False,
                                                        noise_percentage=0.0,
                                                        n_drift_features=3),
                               drift_stream=LEDGeneratorDrift(
                                   has_noise=False,
                                   noise_percentage=0.0,
                                   n_drift_features=7),
                               random_state=None,
                               position=21000000,
                               width=1000000)
    led_g.name = "led_g"
    rand_tree = RandomTreeGenerator()
    rand_tree.name = "rand_tree"
    #rbf_if = RandomRBFGeneratorDrift(change_speed=0.001)
    #rbf_if.name = "rbf_if"
    #rbf_im = RandomRBFGeneratorDrift(change_speed=0.0001)
    #rbf_im.name = "rbf_im"
    sea_a = ConceptDriftStream(stream=SEAGenerator(random_state=112,
                                                   noise_percentage=0.1),
                               drift_stream=SEAGenerator(
                                   random_state=112,
                                   classification_function=2,
                                   noise_percentage=0.1),
                               alpha=90.0,
                               random_state=None,
                               position=21000000,
                               width=1)
    sea_a.name = "sea_a"
    sea_g = ConceptDriftStream(stream=SEAGenerator(random_state=112,
                                                   noise_percentage=0.1),
                               drift_stream=SEAGenerator(
                                   random_state=112,
                                   classification_function=1,
                                   noise_percentage=0.1),
                               random_state=None,
                               position=21000000,
                               width=1000000)
    sea_g.name = "sea_g"
    return [agrawal_a, agrawal_g, hyper, led_a, led_g, rand_tree, sea_a, sea_g]