def __init__(self, concept_id=0, seed=None, noise=0): self.cf = concept_id self.seed = seed stream = AGRAWALGenerator(concept_id, random_state=seed, perturbation=noise) stream.prepare_for_use() super().__init__(stream)
def test_agrawal_generator_all_functions(test_path): for f in range(10): stream = AGRAWALGenerator(classification_function=f, random_state=1) # Load test data corresponding to first 10 instances test_file = os.path.join(test_path, 'agrawal_stream_{}.npz'.format(f)) data = np.load(test_file) X_expected = data['X'] y_expected = data['y'] X, y = stream.next_sample(10) assert np.alltrue(X == X_expected) assert np.alltrue(y == y_expected)
def test_agrawal_drift(test_path): stream = AGRAWALGenerator(random_state=1) stream.prepare_for_use() X, y = stream.next_sample(10) stream.generate_drift() X_drift, y_drift = stream.next_sample(10) # Load test data corresponding to first 10 instances test_file = os.path.join(test_path, 'agrawal_stream_drift.npz') data = np.load(test_file) X_expected = data['X'] y_expected = data['y'] X = np.concatenate((X, X_drift)) y = np.concatenate((y, y_drift)) assert np.alltrue(X == X_expected) assert np.alltrue(y == y_expected)
def test_agrawal_generator(test_path): stream = AGRAWALGenerator(classification_function=2, random_state=112, balance_classes=False, perturbation=0.28) assert stream.n_remaining_samples() == -1 expected_names = [ "salary", "commission", "age", "elevel", "car", "zipcode", "hvalue", "hyears", "loan" ] assert stream.feature_names == expected_names expected_targets = [0, 1] assert stream.target_values == expected_targets assert stream.target_names == ['target'] assert stream.n_features == 9 assert stream.n_cat_features == 3 assert stream.n_num_features == 6 assert stream.n_targets == 1 assert stream.get_data_info( ) == 'AGRAWAL Generator - 1 target(s), 2 classes, 9 features' assert stream.has_more_samples() is True assert stream.is_restartable() is True # Load test data corresponding to first 10 instances test_file = os.path.join(test_path, 'agrawal_stream.npz') data = np.load(test_file) X_expected = data['X'] y_expected = data['y'] X, y = stream.next_sample() assert np.alltrue(X[0] == X_expected[0]) assert np.alltrue(y[0] == y_expected[0]) X, y = stream.last_sample() assert np.alltrue(X[0] == X_expected[0]) assert np.alltrue(y[0] == y_expected[0]) stream.restart() X, y = stream.next_sample(10) assert np.alltrue(X == X_expected) assert np.alltrue(y == y_expected) assert stream.n_targets == np.array(y).ndim assert stream.n_features == X.shape[1] assert 'stream' == stream._estimator_type expected_info = "AGRAWALGenerator(balance_classes=False, classification_function=2,\n" \ " perturbation=0.28, random_state=112)" assert stream.get_info() == expected_info
def main(): usedSynthData = [[ "synthData/cess_data.csv", "synthData/cess_targets.csv" ], ["synthData/move_square_data.csv", "synthData/move_square_targets.csv"], ["synthData/sea_data.csv", "synthData/sea_targets.csv"]] #Name of the datastreams synthDataStreams_names = [ "Cess_data", "Move_squares", "Sea_data", ] realDataFiles = [ ["realData/electric_data.csv", "realData/electric_targets.csv"], ["realData/poker_data.csv", "realData/poker_targets.csv"], ["realData/weather_data.csv", "realData/weather_targets.csv"], ["realData/rialto_data.csv", "realData/rialto_targets.csv"] ] #Name of the datastreams realDataStreams_names = ["Electric", "Poker", "Weather", "Rialto"] #fixe the poker dataset #dfX=pd.read_csv("realData/poker_data_broken.csv") #dfY=pd.read_csv(realTargetFiles[1]) #print(dfX.dtypes) #remove the false columns #dfX = dfX.drop(columns = ['feat_11', 'feat_12']) #print(dfX.dtypes) #save fixed data as csv #dfX.to_csv(r'realData/poker_data.csv', index = None, header=True) #check if saved correctly #X=pd.read_csv(realDataFiles[1]) #print(X.dtypes) #fix electirc dataset #dfX=pd.read_csv("realData/electric_data_broken.csv") #print(dfX.dtypes) #remove the false columns #dfX = dfX.drop(columns = ['feat_1', 'feat_2']) #print(dfX.dtypes) #dfX.to_csv(r'realData/electric_data.csv', index = None, header=True) #check if saved correctly #X=pd.read_csv(realDataFiles[0]) #print(X.dtypes) #Stream with synth generated data from generators, synth data stream that were used in other works and real data streams synthDataStreams = [ [AGRAWALGenerator(random_state=112, perturbation=0.1), "Agrawal"], [ ConceptDriftStream(stream=AGRAWALGenerator(random_state=112), drift_stream=AGRAWALGenerator(random_state=112, perturbation=0.1), position=40000, width=10000), "Agrawal_drift" ], [ HyperplaneGenerator(mag_change=0.001, noise_percentage=0.1), "Hyperplane" ], [ ConceptDriftStream(stream=HyperplaneGenerator(), drift_stream=HyperplaneGenerator(), position=40000, width=10000), "Hyperplane_drift" ], [SineGenerator(random_state=112), "Sine"], [ ConceptDriftStream(stream=SineGenerator(random_state=112), drift_stream=SineGenerator(random_state=112), position=40000, width=10000), "Sine_drift" ] ] synthDataStreamsUsed = [] for i in range(len(usedSynthData)): synthDataStreamsUsed.append([ DataStream(pd.read_csv(usedSynthData[i][0]), pd.read_csv(usedSynthData[i][1])), synthDataStreams_names[i] ]) realDataStreams = [] for i in range(len(realDataFiles)): realDataStreams.append([ DataStream(pd.read_csv(realDataFiles[i][0]), pd.read_csv(realDataFiles[i][1])), realDataStreams_names[i] ]) clfs = [[RSLVQSgd(), 'RSLVQ_SGD'], [RSLVQAdadelta(), 'RSLVQ_Adadelta'], [RSLVQRMSprop(), 'RSLVQ_RMSprop'], [RSLVQAdam(), 'RSLVQ_Adam']] max_items = 40000 #insert the dataset array that should be evaluated, if the reform exception occurs, set the dataset #that is effected by it as the first one in the array and run again for i in range(len(synthDataStreams)): for j in range(len(clfs)): #print('bla') #custom_evaluation(synthDataStreams[i], clfs[j], max_items, False) custom_evaluation(synthDataStreams[i], clfs[j], max_items, True)
def init_standard_streams_naive_bayes( ): # RBF Stream beinhaltet negative Werte daher muss dieser beim Naive Bayes Algortihmus weggelassen werden """Initialize standard data streams Standard streams are inspired by the experiment settings of Gomes, Heitor Murilo & Bifet, Albert & Read, Jesse & Barddal, Jean Paul & Enembreck, Fabrício & Pfahringer, Bernhard & Holmes, Geoff & Abdessalem, Talel. (2017). Adaptive random forests for evolving data stream classification. Machine Learning. 1-27. 10.1007/s10994-017-5642-8. """ agrawal_a = ConceptDriftStream(stream=AGRAWALGenerator(random_state=112, perturbation=0.1), drift_stream=AGRAWALGenerator( random_state=112, classification_function=2, perturbation=0.1), random_state=None, alpha=90.0, position=21000000) agrawal_a.name = "agrawal_a" agrawal_g = ConceptDriftStream(stream=AGRAWALGenerator(random_state=112, perturbation=0.1), drift_stream=AGRAWALGenerator( random_state=112, classification_function=1, perturbation=0.1), random_state=None, position=21000000, width=1000000) agrawal_g.name = "agrawal_g" hyper = HyperplaneGenerator(mag_change=0.001, noise_percentage=0.1) led_a = ConceptDriftStream( stream=LEDGeneratorDrift(has_noise=False, noise_percentage=0.0, n_drift_features=3), drift_stream=LEDGeneratorDrift(has_noise=False, noise_percentage=0.0, n_drift_features=7), random_state=None, alpha=90.0, # angle of change grade 0 - 90 position=21000000, width=1) led_a.name = "led_a" led_g = ConceptDriftStream(stream=LEDGeneratorDrift(has_noise=False, noise_percentage=0.0, n_drift_features=3), drift_stream=LEDGeneratorDrift( has_noise=False, noise_percentage=0.0, n_drift_features=7), random_state=None, position=21000000, width=1000000) led_g.name = "led_g" rand_tree = RandomTreeGenerator() rand_tree.name = "rand_tree" #rbf_if = RandomRBFGeneratorDrift(change_speed=0.001) #rbf_if.name = "rbf_if" #rbf_im = RandomRBFGeneratorDrift(change_speed=0.0001) #rbf_im.name = "rbf_im" sea_a = ConceptDriftStream(stream=SEAGenerator(random_state=112, noise_percentage=0.1), drift_stream=SEAGenerator( random_state=112, classification_function=2, noise_percentage=0.1), alpha=90.0, random_state=None, position=21000000, width=1) sea_a.name = "sea_a" sea_g = ConceptDriftStream(stream=SEAGenerator(random_state=112, noise_percentage=0.1), drift_stream=SEAGenerator( random_state=112, classification_function=1, noise_percentage=0.1), random_state=None, position=21000000, width=1000000) sea_g.name = "sea_g" return [agrawal_a, agrawal_g, hyper, led_a, led_g, rand_tree, sea_a, sea_g]