def test_hoeffding_tree_regressor_perceptron(): stream = RegressionGenerator(n_samples=500, n_features=20, n_informative=15, random_state=1) stream.prepare_for_use() learner = HoeffdingTreeRegressor(leaf_prediction='perceptron', random_state=1) cnt = 0 max_samples = 500 y_pred = array('d') y_true = array('d') wait_samples = 10 while cnt < max_samples: X, y = stream.next_sample() # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): y_pred.append(learner.predict(X)[0]) y_true.append(y[0]) learner.partial_fit(X, y) cnt += 1 expected_predictions = array('d', [ 1198.4326121743168, 456.36607750881586, 927.9912160545144, 1160.4797981899128, 506.50541829176535, -687.8187227095925, -677.8120094065415, 231.14888704761225, -284.46324039942937, -255.69195985557175, 47.58787439365423, -135.22494016284043, -10.351457437330152, 164.95903200643997, 360.72854984472383, 193.30633911830088, -64.23638301570358, 587.9771578214296, 649.8395655757931, 481.01214222804026, 305.4402728117724, 266.2096493865043, -445.11447171009775, -567.5748694154349, -68.70070048021438, -446.79910655850153, -115.892348067663, -98.26862866231015, 71.04707905920286, -10.239274802165584, 18.748731569441812, 4.971217265129857, 172.2223575990573, -655.2864976783711, -129.69921313686626, -114.01187375876822, -405.66166686550963, -215.1264381928009, -345.91020370426247, -80.49330468453074, 108.78958382083302, 134.95267043280126, -398.5273538477553, -157.1784910649728, 219.72541225645654, -100.91598162899217, 80.9768574308987, -296.8856956382453, 251.9332271253148 ]) assert np.allclose(y_pred, expected_predictions) error = mean_absolute_error(y_true, y_pred) expected_error = 362.98595964244623 assert np.isclose(error, expected_error) expected_info = "HoeffdingTreeRegressor(binary_split=False, grace_period=200, leaf_prediction='perceptron', " \ "learning_ratio_const=True, learning_ratio_decay=0.001, learning_ratio_perceptron=0.02, " \ "max_byte_size=33554432, memory_estimate_period=1000000, nb_threshold=0, no_preprune=False, " \ "nominal_attributes=None, random_state=1, remove_poor_atts=False, split_confidence=1e-07, " \ "stop_mem_management=False, tie_threshold=0.05)" info = " ".join([line.strip() for line in learner.get_info().split()]) assert info == expected_info assert isinstance(learner.get_model_description(), type('')) assert type(learner.predict(X)) == np.ndarray
def test_evaluate_delayed_multi_target_regression_coverage(tmpdir): from skmultiflow.data import RegressionGenerator from skmultiflow.trees import iSOUPTreeRegressor max_samples = 1000 # Stream data = RegressionGenerator(n_samples=max_samples, n_features=20, n_informative=15, random_state=1, n_targets=7) # Get X and y X, y = data.next_sample(max_samples) time = generate_random_dates(seed=1, samples=max_samples) # Setup temporal stream stream = TemporalDataStream(X, y, time, ordered=False) # Learner mtrht = iSOUPTreeRegressor(leaf_prediction='adaptive') output_file = os.path.join(str(tmpdir), "prequential_delayed_summary.csv") metrics = [ 'average_mean_square_error', 'average_mean_absolute_error', 'average_root_mean_square_error' ] evaluator = EvaluatePrequentialDelayed(max_samples=max_samples, metrics=metrics, output_file=output_file) evaluator.evaluate(stream=stream, model=mtrht, model_names=['MTRHT'])
def test_isoup_tree_model_description(): stream = RegressionGenerator(n_samples=700, n_features=20, n_informative=15, random_state=1, n_targets=3) learner = iSOUPTreeRegressor(leaf_prediction='mean') max_samples = 700 X, y = stream.next_sample(max_samples) # Trying to predict without fitting learner.predict(X[0]) learner.partial_fit(X, y) expected_description = "if Attribute 11 <= 0.36737233297880056:\n" \ " Leaf = Statistics {0: 450.0000, 1: [-23322.8079, -30257.1616, -18740.9462], " \ "2: [22242706.1751, 29895648.2424, 18855571.7943]}\n" \ "if Attribute 11 > 0.36737233297880056:\n" \ " Leaf = Statistics {0: 250.0000, 1: [33354.8675, 32390.6094, 22886.4176], " \ "2: [15429435.6709, 17908472.4289, 10709746.1079]}\n" \ assert SequenceMatcher(None, expected_description, learner.get_model_description()).ratio() > 0.9
def test_evaluate_delayed_regression_coverage(tmpdir): # A simple coverage test. Tests for metrics are placed in the corresponding test module. from skmultiflow.data import RegressionGenerator from skmultiflow.trees import HoeffdingTreeRegressor max_samples = 1000 # Generate data data = RegressionGenerator(n_samples=max_samples) # Get X and y X, y = data.next_sample(max_samples) time = generate_random_dates(seed=1, samples=max_samples) # Setup temporal stream stream = TemporalDataStream(X, y, time, ordered=False) # Learner htr = HoeffdingTreeRegressor() output_file = os.path.join(str(tmpdir), "prequential_delayed_summary.csv") metrics = ['mean_square_error', 'mean_absolute_error'] evaluator = EvaluatePrequentialDelayed(max_samples=max_samples, metrics=metrics, output_file=output_file) evaluator.evaluate(stream=stream, model=htr, model_names=['HTR'])
def test_hoeffding_tree_regressor_coverage(): max_samples = 1000 max_size_mb = 2 stream = RegressionGenerator( n_samples=max_samples, n_features=10, n_informative=7, n_targets=1, random_state=42 ) X, y = stream.next_sample(max_samples) # Cover memory management tree = HoeffdingTreeRegressor( leaf_prediction='mean', grace_period=100, memory_estimate_period=100, max_byte_size=max_size_mb*2**20 ) tree.partial_fit(X, y) # A tree without memory management enabled reaches over 3 MB in size assert calculate_object_size(tree, 'MB') <= max_size_mb # Typo in leaf prediction tree = HoeffdingTreeRegressor( leaf_prediction='percptron', grace_period=100, memory_estimate_period=100, max_byte_size=max_size_mb*2**20 ) # Invalid split_criterion tree.split_criterion = 'VR' tree.partial_fit(X, y) assert calculate_object_size(tree, 'MB') <= max_size_mb tree.reset() assert tree._estimator_type == 'regressor'
def test_hoeffding_tree(): stream = RegressionGenerator(n_samples=500, n_features=20, n_informative=15, random_state=1) stream.prepare_for_use() learner = HoeffdingAdaptiveTreeRegressor(leaf_prediction='mean', random_state=1) cnt = 0 max_samples = 500 y_pred = array('d') y_true = array('d') wait_samples = 10 while cnt < max_samples: X, y = stream.next_sample() # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): y_pred.append(learner.predict(X)[0]) y_true.append(y[0]) learner.partial_fit(X, y) cnt += 1 expected_predictions = array('d', [ 102.38946041769101, 55.6584574987656, 5.746076599168373, 17.11797209372667, 2.566888222752787, 9.188247802192826, 17.87894804676911, 15.940629626883966, 8.981172175448485, 13.152624115190092, 11.106058099429399, 6.473195313058236, 4.723621479590173, 13.825568609556493, 8.698873073880696, 1.6452441811010252, 5.123496188584294, 6.34387187194982, 5.9977733790395105, 6.874251577667707, 4.605348088338317, 8.20112636572672, 9.032631648758098, 4.428189978974459, 4.249801041367518, 9.983272668044492, 12.859518508979734, 11.741395774380285, 11.230028410261868, 9.126921979081521, 9.132146661688296, 7.750655625124709, 6.445145118245414, 5.760928671876355, 4.041291302080659, 3.591837600560529, 0.7640424010500604, 0.1738639840537784, 2.2068337802212286, -81.05302946841077, 96.17757415335177, -77.35894903819677, 95.85568683733698, 99.1981674250886, 99.89327888035015, 101.66673013734784, -79.1904234513751, -80.42952143783687, 100.63954789983896 ]) assert np.allclose(y_pred, expected_predictions) error = mean_absolute_error(y_true, y_pred) expected_error = 143.11351404083086 assert np.isclose(error, expected_error) expected_info = "HoeffdingAdaptiveTreeRegressor(binary_split=False, grace_period=200, leaf_prediction='mean', " \ "learning_ratio_const=True, learning_ratio_decay=0.001, learning_ratio_perceptron=0.02, " \ "max_byte_size=33554432, memory_estimate_period=1000000, nb_threshold=0, no_preprune=False, " \ "nominal_attributes=None, random_state=1, remove_poor_atts=False, split_confidence=1e-07, " \ "stop_mem_management=False, tie_threshold=0.05)" info = " ".join([line.strip() for line in learner.get_info().split()]) assert info == expected_info assert isinstance(learner.get_model_description(), type('')) assert type(learner.predict(X)) == np.ndarray
def test_hoeffding_tree_coverage(): max_samples = 1000 max_size_mb = 2 stream = RegressionGenerator( n_samples=max_samples, n_features=10, n_informative=7, n_targets=3, random_state=42 ) X, y = stream.next_sample(max_samples) # Will generate a warning concerning the invalid leaf prediction option tree = StackedSingleTargetHoeffdingTreeRegressor( leaf_prediction='mean', grace_period=200, memory_estimate_period=100, max_byte_size=max_size_mb*2**20 ) # Trying to predict without fitting tree.predict(X[0]) tree.partial_fit(X, y) # A tree without memory management enabled reaches over 3 MB in size assert calculate_object_size(tree, 'MB') <= max_size_mb tree = StackedSingleTargetHoeffdingTreeRegressor( leaf_prediction='adaptive', grace_period=200, memory_estimate_period=100, max_byte_size=max_size_mb*2**20, learning_ratio_const=False ) tree.partial_fit(X, y) assert calculate_object_size(tree, 'MB') <= max_size_mb
def test_hoeffding_tree_regressor_perceptron(): stream = RegressionGenerator(n_samples=500, n_features=20, n_informative=15, random_state=1) learner = HoeffdingTreeRegressor(leaf_prediction='perceptron', random_state=1) cnt = 0 max_samples = 500 y_pred = array('d') y_true = array('d') wait_samples = 10 while cnt < max_samples: X, y = stream.next_sample() # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): y_pred.append(learner.predict(X)[0]) y_true.append(y[0]) learner.partial_fit(X, y) cnt += 1 expected_predictions = array('d', [ 525.7553636732247, 352.8160300365902, 224.80744320456478, 193.72837054292074, 132.6059603765031, 117.06974933197759, 114.53342429855932, 89.37195405567235, 57.85335051891305, 60.00883955911155, 47.263185779784266, 25.17616431074491, 17.43259526890146, 47.33468996498019, 22.83975208548138, -7.659282840823236, 8.564101665071064, 14.61585289361161, 11.560941733770441, 13.70120291865976, 1.1938438210799651, 19.01970713481836, 21.23459424444584, -5.667473522309328, -5.203149619381393, 28.726275200889173, 41.03406433337882, 27.950322712127267, 21.267116786963925, 5.53344652490152, 6.753264259267268, -2.3288137435962213, -10.492766334689875, -11.19641058176631, -20.134685945295644, -19.36581990084085, -38.26894947177957, -34.90246284430353, -11.019543212232008, -22.016714766708127, -18.710456277443544, -20.5568019328217, -2.636583876625667, 24.787714491718187, 29.325261678088406, 45.31267371823666, -48.271054430207776, -59.7649172085901, 48.22724814037523 ]) # assert np.allclose(y_pred, expected_predictions) error = mean_absolute_error(y_true, y_pred) expected_error = 152.12931270533377 assert np.isclose(error, expected_error) expected_info = "HoeffdingTreeRegressor(binary_split=False, grace_period=200, leaf_prediction='perceptron', " \ "learning_ratio_const=True, learning_ratio_decay=0.001, learning_ratio_perceptron=0.02, " \ "max_byte_size=33554432, memory_estimate_period=1000000, nb_threshold=0, no_preprune=False, " \ "nominal_attributes=None, random_state=1, remove_poor_atts=False, split_confidence=1e-07, " \ "stop_mem_management=False, tie_threshold=0.05)" info = " ".join([line.strip() for line in learner.get_info().split()]) assert info == expected_info assert isinstance(learner.get_model_description(), type('')) assert type(learner.predict(X)) == np.ndarray
def test_multi_output_learner_regressor(): stream = RegressionGenerator(n_samples=5500, n_features=10, n_informative=20, n_targets=2, random_state=1) stream.prepare_for_use() estimator = SGDRegressor(random_state=112, tol=1e-3, max_iter=10, loss='squared_loss') learner = MultiOutputLearner(base_estimator=estimator) X, y = stream.next_sample(150) learner.partial_fit(X, y) cnt = 0 max_samples = 5000 predictions = [] true_targets = [] wait_samples = 100 correct_predictions = 0 while cnt < max_samples: X, y = stream.next_sample() # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): predictions.append(learner.predict(X)[0]) true_targets.append(y[0]) if np.array_equal(y[0], predictions[-1]): correct_predictions += 1 learner.partial_fit(X, y) cnt += 1 expected_performance = 2.444365309339395 performance = mean_absolute_error(true_targets, predictions) assert np.isclose(performance, expected_performance) assert learner._estimator_type == "regressor" assert type(learner.predict(X)) == np.ndarray with pytest.raises(AttributeError): learner.predict_proba(X)
def test_hoeffding_adaptive_tree_regressor_perceptron(): stream = RegressionGenerator(n_samples=500, n_features=20, n_informative=15, random_state=1) learner = HoeffdingAdaptiveTreeRegressor(leaf_prediction='perceptron', random_state=1) cnt = 0 max_samples = 500 y_pred = array('d') y_true = array('d') wait_samples = 10 while cnt < max_samples: X, y = stream.next_sample() # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): y_pred.append(learner.predict(X)[0]) y_true.append(y[0]) learner.partial_fit(X, y) cnt += 1 expected_predictions = array('d', [207.20901655684412, 106.30316877540555, 101.46950096324191, 114.38162776688861, 48.40271620592212, -79.94375846313639, -76.69182794940929, 88.38425569670662, -13.92372162581644, 3.0549887923350507, 55.36276732455883, 32.0512081208464, 17.54953203218902, -1.7305966738232161, 43.54548690756897, 8.502241407478213, -61.14739038895263, 50.528736810827745, 9.679668917948607, 89.93098085572623, 85.1994809437223, 1.8721866382932664, -7.1972581323107825, -45.86230662663542, 3.111671172363243, 57.921908276916646, 61.43400576850072, -16.61695641848216, -6.0769944259948065, 19.929266442289546, -60.972801351912224, -0.3342549973033524, -50.53334350658139, -14.885488543743078, -13.255920225124637, 28.909916365484275, -103.03499425386107, -36.44921969674884, -15.40018796932204, -84.98471039676006, 38.270205984888065, -62.97228157481581, -48.095864628804044, 95.5028130171316, 73.62390886812497, 152.7135140597221, -120.4662342226783, -77.68182541723442, 66.82059046110074]) assert np.allclose(y_pred, expected_predictions) error = mean_absolute_error(y_true, y_pred) expected_error = 126.11208652969131 assert np.isclose(error, expected_error) expected_info = "HoeffdingAdaptiveTreeRegressor(binary_split=False, grace_period=200, " \ "leaf_prediction='perceptron', learning_ratio_const=True, learning_ratio_decay=0.001, " \ "learning_ratio_perceptron=0.02, max_byte_size=33554432, memory_estimate_period=1000000, " \ "no_preprune=False, nominal_attributes=None, random_state=1, " \ "remove_poor_atts=False, split_confidence=1e-07, stop_mem_management=False, tie_threshold=0.05)" info = " ".join([line.strip() for line in learner.get_info().split()]) assert info == expected_info assert isinstance(learner.get_model_description(), type('')) assert type(learner.predict(X)) == np.ndarray assert learner._estimator_type == 'regressor'
def test_stacked_single_target_hoeffding_tree_regressor_adaptive(test_path): stream = RegressionGenerator(n_samples=2000, n_features=20, n_informative=15, random_state=1, n_targets=3) learner = StackedSingleTargetHoeffdingTreeRegressor( leaf_prediction='adaptive', random_state=1 ) cnt = 0 max_samples = 2000 wait_samples = 200 y_pred = np.zeros((int(max_samples / wait_samples), 3)) y_true = np.zeros((int(max_samples / wait_samples), 3)) while cnt < max_samples: X, y = stream.next_sample() # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): y_pred[int(cnt / wait_samples), :] = learner.predict(X) y_true[int(cnt / wait_samples), :] = y learner.partial_fit(X, y) cnt += 1 test_file = os.path.join( test_path, 'expected_preds_stacked_single_target_hoeffding_tree_adaptive.npy' ) expected_predictions = np.load(test_file) assert np.allclose(y_pred, expected_predictions) error = mean_absolute_error(y_true, y_pred) expected_error = 152.8716829154756 assert np.isclose(error, expected_error) expected_info = "StackedSingleTargetHoeffdingTreeRegressor(binary_split=False, grace_period=200,\n" \ " leaf_prediction='adaptive',\n" \ " learning_ratio_const=True,\n" \ " learning_ratio_decay=0.001,\n" \ " learning_ratio_perceptron=0.02,\n" \ " max_byte_size=33554432,\n" \ " memory_estimate_period=1000000,\n" \ " nb_threshold=0, no_preprune=False,\n" \ " nominal_attributes=None,\n" \ " random_state=1,\n" \ " remove_poor_atts=False,\n" \ " split_confidence=1e-07,\n" \ " stop_mem_management=False,\n" \ " tie_threshold=0.05)" assert learner.get_info() == expected_info assert isinstance(learner.get_model_description(), type(''))
def test_hoeffding_tree_regressor_perceptron(): stream = RegressionGenerator(n_samples=500, n_features=20, n_informative=15, random_state=1) learner = HoeffdingTreeRegressor(leaf_prediction='perceptron', random_state=1) cnt = 0 max_samples = 500 y_pred = array('d') y_true = array('d') wait_samples = 10 while cnt < max_samples: X, y = stream.next_sample() # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): y_pred.append(learner.predict(X)[0]) y_true.append(y[0]) learner.partial_fit(X, y) cnt += 1 expected_predictions = array('d', [-106.84237763060068, -10.965517384802226, -180.90711470797237, -218.20896751607663, -96.4271589961865, 110.51551963099622, 108.34616947202511, 30.1720109214627, 57.92205878998479, 77.82418885914053, 49.972060923364765, 68.56117081695875, 15.996949915551697, -34.22744443808294, -19.762696110319702, -28.447329394752995, -50.62864370485592, -47.37357781048561, -99.82613515424342, 13.985531117918336, 41.41709671929987, -34.679807275938174, 62.75626094547859, 30.925078688018893, 12.130320819235365, 119.3648998377624, 82.96422756064737, -6.920397563039609, -12.701774870569059, 24.883730398016034, -74.22855883237567, -0.8012436194087567, -83.03683748750394, 46.737839617687854, 0.537404558240671, 48.53591837633138, -86.2259777783834, -24.985514024179967, 6.396035456152859, -90.19454995571908, 32.05821807667601, -83.08553684151566, -28.32223999320023, 113.28916673506842, 68.10498750807977, 173.9146410394573, -150.2067507947196, -74.10346402222962, 54.39153137687993]) assert np.allclose(y_pred, expected_predictions) error = mean_absolute_error(y_true, y_pred) expected_error = 115.78916175164417 assert np.isclose(error, expected_error) expected_info = "HoeffdingTreeRegressor(binary_split=False, grace_period=200, leaf_prediction='perceptron', " \ "learning_ratio_const=True, learning_ratio_decay=0.001, learning_ratio_perceptron=0.02, " \ "max_byte_size=33554432, memory_estimate_period=1000000, nb_threshold=0, no_preprune=False, " \ "nominal_attributes=None, random_state=1, remove_poor_atts=False, split_confidence=1e-07, " \ "stop_mem_management=False, tie_threshold=0.05)" info = " ".join([line.strip() for line in learner.get_info().split()]) assert info == expected_info assert isinstance(learner.get_model_description(), type('')) assert type(learner.predict(X)) == np.ndarray
def test_multi_target_regression_hoeffding_tree_mean(test_path): stream = RegressionGenerator(n_samples=500, n_features=20, n_informative=15, random_state=1, n_targets=3) stream.prepare_for_use() learner = MultiTargetRegressionHoeffdingTree(leaf_prediction='mean') cnt = 0 max_samples = 500 wait_samples = 10 y_pred = np.zeros((int(max_samples / wait_samples), 3)) y_true = np.zeros((int(max_samples / wait_samples), 3)) while cnt < max_samples: X, y = stream.next_sample() # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): y_pred[int(cnt / wait_samples), :] = learner.predict(X) y_true[int(cnt / wait_samples), :] = y learner.partial_fit(X, y) cnt += 1 test_file = os.path.join( test_path, 'expected_preds_multi_target_regression_mean.npy') expected_predictions = np.load(test_file) # print(expected_predictions.shape) assert np.allclose(y_pred, expected_predictions) error = mean_absolute_error(y_true, y_pred) expected_error = 167.40626294018753 assert np.isclose(error, expected_error) expected_info = \ 'MultiTargetRegressionHoeffdingTree: max_byte_size: 33554432 - ' \ 'memory_estimate_period: 1000000 - grace_period: 200 - ' \ 'split_criterion: intra cluster variance reduction - ' \ 'split_confidence: 1e-07 - tie_threshold: 0.05 - binary_split: False' \ ' - stop_mem_management: False - remove_poor_atts: False ' \ '- no_pre_prune: False - leaf_prediction: mean - nb_threshold: 0 - ' \ 'nominal_attributes: [] - ' assert learner.get_info() == expected_info assert isinstance(learner.get_model_description(), type(''))
def test_isoup_tree_mean(test_path): stream = RegressionGenerator(n_samples=2000, n_features=20, n_informative=15, random_state=1, n_targets=3) stream.prepare_for_use() learner = iSOUPTreeRegressor(leaf_prediction='mean') cnt = 0 max_samples = 2000 wait_samples = 200 y_pred = np.zeros((int(max_samples / wait_samples), 3)) y_true = np.zeros((int(max_samples / wait_samples), 3)) while cnt < max_samples: X, y = stream.next_sample() # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): y_pred[int(cnt / wait_samples), :] = learner.predict(X) y_true[int(cnt / wait_samples), :] = y learner.partial_fit(X, y) cnt += 1 test_file = os.path.join( test_path, 'expected_preds_multi_target_regression_mean.npy') expected_predictions = np.load(test_file) assert np.allclose(y_pred, expected_predictions) error = mean_absolute_error(y_true, y_pred) expected_error = 191.2823924547882 assert np.isclose(error, expected_error) expected_info = "iSOUPTreeRegressor(binary_split=False, grace_period=200, leaf_prediction='mean', " \ "learning_ratio_const=True, learning_ratio_decay=0.001, learning_ratio_perceptron=0.02, " \ "max_byte_size=33554432, memory_estimate_period=1000000, nb_threshold=0, no_preprune=False, " \ "nominal_attributes=None, random_state=None, remove_poor_atts=False, split_confidence=1e-07, " \ "stop_mem_management=False, tie_threshold=0.05)" info = " ".join([line.strip() for line in learner.get_info().split()]) assert info == expected_info assert type(learner.predict(X)) == np.ndarray
def test_hoeffding_tree_regressor_model_description(): stream = RegressionGenerator( n_samples=500, n_features=20, n_informative=15, random_state=1 ) learner = HoeffdingTreeRegressor(leaf_prediction='mean') max_samples = 500 X, y = stream.next_sample(max_samples) learner.partial_fit(X, y) expected_description = "if Attribute 6 <= 0.1394515530995348:\n" \ " Leaf = Statistics {0: 276.0000, 1: -21537.4157, 2: 11399392.2187}\n" \ "if Attribute 6 > 0.1394515530995348:\n" \ " Leaf = Statistics {0: 224.0000, 1: 22964.8868, 2: 10433581.2534}\n" assert SequenceMatcher( None, expected_description, learner.get_model_description() ).ratio() > 0.9
def test_isoup_tree_coverage(): max_samples = 1000 max_size_mb = 2 stream = RegressionGenerator(n_samples=max_samples, n_features=10, n_informative=7, n_targets=3, random_state=42) # Cover memory management tree = iSOUPTreeRegressor(leaf_prediction='mean', grace_period=200, memory_estimate_period=100, max_byte_size=max_size_mb * 2**20) # Invalid split_criterion tree.split_criterion = 'ICVR' X, y = stream.next_sample(max_samples) tree.partial_fit(X, y) # A tree without memory management enabled reaches over 3 MB in size assert calculate_object_size(tree, 'MB') <= max_size_mb # Memory management in a tree with perceptron leaves (purposeful typo in leaf_prediction) tree = iSOUPTreeRegressor(leaf_prediction='PERCEPTRON', grace_period=200, memory_estimate_period=100, max_byte_size=max_size_mb * 2**20) tree.partial_fit(X, y) assert calculate_object_size(tree, 'MB') <= max_size_mb # Memory management in a tree with adaptive leaves tree = iSOUPTreeRegressor(leaf_prediction='adaptive', grace_period=200, memory_estimate_period=100, max_byte_size=max_size_mb * 2**20) tree.partial_fit(X, y) assert calculate_object_size(tree, 'MB') <= max_size_mb
def test_adaptive_random_forest_regressor_perceptron(): stream = RegressionGenerator(n_samples=500, n_features=20, n_informative=15, random_state=1) learner1 = AdaptiveRandomForestRegressor(n_estimators=3, max_features='log2', leaf_prediction='perceptron', aggregation_method='mean', weighted_vote_strategy=None, max_byte_size=float('Inf'), random_state=1) learner2 = AdaptiveRandomForestRegressor(n_estimators=3, max_features='auto', leaf_prediction='perceptron', aggregation_method='median', weighted_vote_strategy=None, max_byte_size=float('Inf'), random_state=1) learner3 = AdaptiveRandomForestRegressor(n_estimators=3, max_features=4, leaf_prediction='perceptron', aggregation_method='mean', weighted_vote_strategy=None, learning_ratio_const=False, max_byte_size=float('Inf'), random_state=1) cnt = 0 max_samples = 500 y_pred1 = array('d') y_pred2 = array('d') y_pred3 = array('d') y_true = array('d') wait_samples = 10 while cnt < max_samples: X, y = stream.next_sample() # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): y_pred1.append(learner1.predict(X)[0]) y_pred2.append(learner2.predict(X)[0]) y_pred3.append(learner3.predict(X)[0]) y_true.append(y[0]) learner1.partial_fit(X, y) learner2.partial_fit(X, y) learner3.partial_fit(X, y) cnt += 1 error1 = mean_absolute_error(y_true, y_pred1) error2 = mean_absolute_error(y_true, y_pred2) error3 = mean_absolute_error(y_true, y_pred3) expected_error1 = 118.69 expected_error2 = 121.56 expected_error3 = 117.96 assert np.isclose(round(error1, 2), expected_error1) assert np.isclose(round(error2, 2), expected_error2) assert np.isclose(round(error3, 2), expected_error3) learner1.reset() expected_info = "AdaptiveRandomForestRegressor(aggregation_method='median', " \ "binary_split=False, drift_detection_criteria='mse', " \ "drift_detection_method=ADWIN(delta=0.001), grace_period=50, " \ "lambda_value=6, leaf_prediction='perceptron', learning_ratio_const=True, " \ "learning_ratio_decay=0.001, learning_ratio_perceptron=0.1, " \ "max_byte_size=inf, max_features=4, memory_estimate_period=2000000, " \ "n_estimators=3, no_preprune=False, nominal_attributes=None, " \ "random_state=1, remove_poor_atts=False, split_confidence=0.01, " \ "stop_mem_management=False, tie_threshold=0.05, " \ "warning_detection_method=ADWIN(delta=0.01), weighted_vote_strategy=None)" info = " ".join([line.strip() for line in learner2.get_info().split()]) assert info == expected_info