def test_sam_knn(package_path): test_file = os.path.join(package_path, 'src/skmultiflow/data/datasets/sea_big.csv') stream = FileStream(test_file) stream.prepare_for_use() hyperParams = {'maxSize': 1000, 'nNeighbours': 5, 'knnWeights': 'distance', 'STMSizeAdaption': 'maxACCApprox', 'useLTM': False} learner = SAMKNN(n_neighbors=hyperParams['nNeighbours'], max_window_size=hyperParams['maxSize'], weighting=hyperParams['knnWeights'], stm_size_option=hyperParams['STMSizeAdaption'], use_ltm=hyperParams['useLTM']) cnt = 0 max_samples = 5000 predictions = array('d') wait_samples = 100 while cnt < max_samples: X, y = stream.next_sample() # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): predictions.append(learner.predict(X)[0]) learner.partial_fit(X, y) cnt += 1 expected_predictions = array('d', [1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0]) assert np.alltrue(predictions == expected_predictions)
def test_grid(): clfs = [ OzaBagging(base_estimator=KNN()), OzaBaggingAdwin(base_estimator=KNN()), AdaptiveRandomForest(), SAMKNN() ] cv = CrossValidation(clfs=clfs, max_samples=1000000, test_size=1) cv.streams = [ ConceptDriftStream( stream=LEDGeneratorDrift(has_noise=False, noise_percentage=0.0, n_drift_features=3), drift_stream=LEDGeneratorDrift(has_noise=False, noise_percentage=0.0, n_drift_features=7), random_state=None, alpha=90.0, # angle of change grade 0 - 90 position=250000, width=1), ConceptDriftStream( stream=LEDGeneratorDrift(has_noise=False, noise_percentage=0.0, n_drift_features=3), drift_stream=LEDGeneratorDrift(has_noise=False, noise_percentage=0.0, n_drift_features=7), random_state=None, alpha=90.0, # angle of change grade 0 - 90 position=250000, width=50000) ] cv.test() cv.save_summary()
def test_sam_knn_coverage(package_path): test_file = os.path.join(package_path, 'src/skmultiflow/data/datasets/sea_big.csv') stream = FileStream(test_file) stream.prepare_for_use() hyperParams = { 'maxSize': 50, 'n_neighbors': 3, 'weighting': 'uniform', 'stm_size_option': 'maxACC', 'min_stm_size': 10, 'useLTM': True } learner = SAMKNN(n_neighbors=hyperParams['n_neighbors'], max_window_size=hyperParams['maxSize'], weighting=hyperParams['weighting'], stm_size_option=hyperParams['stm_size_option'], min_stm_size=hyperParams['min_stm_size'], use_ltm=hyperParams['useLTM']) cnt = 0 max_samples = 1000 predictions = array('i') wait_samples = 20 while cnt < max_samples: X, y = stream.next_sample() # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): predictions.append(learner.predict(X)[0]) learner.partial_fit(X, y) cnt += 1 expected_predictions = array('i', [ 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1 ]) assert np.alltrue(predictions == expected_predictions)
def evaluation(): classifiers = [ GLVQ(prototypes_per_class=4), HoeffdingTree(), HAT(), KNN(), SAMKNN(), LeverageBagging(), KNNAdwin(max_window_size=1000) ] # Array mit Klassifikationsalgorithmen die getestet werden sollen cv = CrossValidation(clfs=classifiers, max_samples=1000000, test_size=1) cv.streams = cv.init_standard_streams() + cv.init_real_world( ) + cv.init_reoccuring_streams( ) # initialisiert Stream Generatoren des Scikit-Multiflow Package cv.test() cv.save_summary()
def init_classifiers(): n_prototypes_per_class = 4 sigma = 4 rslvq = RSLVQ(prototypes_per_class=4, sigma=4) arslvq = ARSLVQ(prototypes_per_class=n_prototypes_per_class, sigma=sigma, confidence=0.0001, window_size=300) oza = OzaBaggingAdwin(base_estimator=KNN()) adf = AdaptiveRandomForest() samknn = SAMKNN() hat = HAT() clfs = [samknn] names = ["SamKnn"] # clfs = [rslvq] # names = ["rslvq"] return clfs, names
total_length = int(total_length) for data in response.iter_content(chunk_size=4096): dl += len(data) f.write(data) done = int(50 * dl / total_length) sys.stdout.write("\r[%s%s]" % ('=' * done, ' ' * (50 - done))) sys.stdout.flush() data = np.load(file_name, allow_pickle=True) return data # data = download_data() #If dataset file is already downloaded data = np.load(file_name, allow_pickle=True) sam = SAMKNN() arf = HoeffdingAdaptiveTreeClassifier() stream = DataStream(data[:, 1:], data[:, 0].astype(int)) stream.prepare_for_use() evaluator = EvaluatePrequential(max_samples=10000, max_time=1000, show_plot=True, metrics=['accuracy', 'kappa']) evaluator.evaluate(stream=stream, model=[sam, arf], model_names=['Sam', 'RSLVQ'])
def test_grid(): clfs = [RRSLVQ(prototypes_per_class=4,sigma=8),RSLVQ(prototypes_per_class=4,sigma=8),HAT(),OzaBaggingAdwin(base_estimator=KNN()),AdaptiveRandomForest(),SAMKNN()] cv = CrossValidation(clfs=clfs,max_samples=1000000,test_size=1) cv.streams = cv.init_reoccuring_streams() cv.test() cv.save_summary() print("here")
stream.prepare_for_use() evluator = EvaluatePrequential(batch_size=10, max_samples=10000, show_plot=True, metrics=['accuracy']) evluator.evaluate(stream=stream, model=cls, model_names=detectors) arslvq = ARSLVQ(prototypes_per_class=n_prototypes_per_class, drift_detector="KS", confidence=0.05, sigma=sigma) oza = OzaBaggingAdwin(base_estimator=KNN()) adf = AdaptiveRandomForest() samknn = SAMKNN() hat = HAT() cls = [arslvq, oza, adf, samknn, hat] detectors = ["ARSLVQ", "OzaAdwin", "ADF", "SamKNN", "HAT"] s1 = MIXEDGenerator(classification_function=1, random_state=112, balance_classes=False) s2 = MIXEDGenerator(classification_function=0, random_state=112, balance_classes=False) stream = ReoccuringDriftStream(stream=s1, drift_stream=s2, random_state=None, alpha=90.0,