def test_hat_mc(test_path): stream = ConceptDriftStream(stream=SEAGenerator(random_state=1, noise_percentage=0.05), drift_stream=SEAGenerator( random_state=2, classification_function=2, noise_percentage=0.05), random_state=1, position=250, width=10) stream.prepare_for_use() learner = HAT(leaf_prediction='mc') cnt = 0 max_samples = 1000 y_pred = array('i') y_proba = [] wait_samples = 20 while cnt < max_samples: X, y = stream.next_sample() # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): y_pred.append(learner.predict(X)[0]) y_proba.append(learner.predict_proba(X)[0]) learner.partial_fit(X, y) cnt += 1 expected_predictions = array('i', [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ]) assert np.alltrue(y_pred == expected_predictions) test_file = os.path.join(test_path, 'test_hoeffding_adaptive_tree_mc.npy') data = np.load(test_file) assert np.allclose(y_proba, data) expected_info = "HAT(binary_split=False, grace_period=200, leaf_prediction='mc',\n" \ " max_byte_size=33554432, memory_estimate_period=1000000, nb_threshold=0,\n" \ " no_preprune=False, nominal_attributes=None, remove_poor_atts=False,\n" \ " split_confidence=1e-07, split_criterion='info_gain',\n" \ " stop_mem_management=False, tie_threshold=0.05)" assert learner.get_info() == expected_info expected_model_1 = 'Leaf = Class 1.0 | {0.0: 398.0, 1.0: 1000.0}\n' assert (learner.get_model_description() == expected_model_1) assert type(learner.predict(X)) == np.ndarray assert type(learner.predict_proba(X)) == np.ndarray stream.restart() X, y = stream.next_sample(5000) learner = HAT(max_byte_size=30, leaf_prediction='mc', grace_period=10) learner.partial_fit(X, y)
def test_hat_nb(test_path): stream = ConceptDriftStream(stream=SEAGenerator(random_state=1, noise_percentage=0.05), drift_stream=SEAGenerator( random_state=2, classification_function=2, noise_percentage=0.05), random_state=1, position=250, width=10) stream.prepare_for_use() learner = HAT(leaf_prediction='nb') cnt = 0 max_samples = 1000 y_pred = array('i') y_proba = [] wait_samples = 20 while cnt < max_samples: X, y = stream.next_sample() # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): y_pred.append(learner.predict(X)[0]) y_proba.append(learner.predict_proba(X)[0]) learner.partial_fit(X, y) cnt += 1 expected_predictions = array('i', [ 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1 ]) assert np.alltrue(y_pred == expected_predictions) test_file = os.path.join(test_path, 'test_hoeffding_adaptive_tree_nb.npy') data = np.load(test_file) assert np.allclose(y_proba, data) expected_info = 'HAT: max_byte_size: 33554432 - memory_estimate_period: 1000000 - grace_period: 200' \ ' - split_criterion: info_gain - split_confidence: 1e-07 - tie_threshold: 0.05' \ ' - binary_split: False - stop_mem_management: False - remove_poor_atts: False' \ ' - no_pre_prune: False - leaf_prediction: nb - nb_threshold: 0' \ ' - nominal_attributes: [] - ' assert learner.get_info() == expected_info assert type(learner.predict(X)) == np.ndarray assert type(learner.predict_proba(X)) == np.ndarray
def test_hat_mc(test_path): stream = ConceptDriftStream(stream=SEAGenerator(random_state=1, noise_percentage=0.05), drift_stream=SEAGenerator( random_state=2, classification_function=2, noise_percentage=0.05), random_state=1, position=250, width=10) stream.prepare_for_use() learner = HAT(leaf_prediction='mc') cnt = 0 max_samples = 1000 y_pred = array('i') y_proba = [] wait_samples = 20 while cnt < max_samples: X, y = stream.next_sample() # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): y_pred.append(learner.predict(X)[0]) y_proba.append(learner.predict_proba(X)[0]) learner.partial_fit(X, y) cnt += 1 expected_predictions = array('i', [ 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1 ]) assert np.alltrue(y_pred == expected_predictions) test_file = os.path.join(test_path, 'test_hoeffding_adaptive_tree_mc.npy') data = np.load(test_file) assert np.allclose(y_proba, data) expected_info = 'HAT: max_byte_size: 33554432 - memory_estimate_period: 1000000 - grace_period: 200' \ ' - split_criterion: info_gain - split_confidence: 1e-07 - tie_threshold: 0.05' \ ' - binary_split: False - stop_mem_management: False - remove_poor_atts: False' \ ' - no_pre_prune: False - leaf_prediction: mc - nb_threshold: 0' \ ' - nominal_attributes: [] - ' assert learner.get_info() == expected_info expected_model_1 = 'Leaf = Class 1.0 | {0.0: 0.005295278636481529, 1.0: 1.9947047213635185}\n' expected_model_2 = 'Leaf = Class 1.0 | {0.0: 0.0052952786364815294, 1.0: 1.9947047213635185}\n' expected_model_3 = 'Leaf = Class 1.0 | {1.0: 1.9947047213635185, 0.0: 0.0052952786364815294}\n' assert (learner.get_model_description() == expected_model_1) \ or (learner.get_model_description() == expected_model_2) \ or (learner.get_model_description() == expected_model_3) stream.restart() X, y = stream.next_sample(5000) learner = HAT(max_byte_size=30, leaf_prediction='mc', grace_period=10) learner.partial_fit(X, y)
def test_HAT(test_path): stream = RandomTreeGenerator(tree_random_state=23, sample_random_state=12, n_classes=4, n_cat_features=2, n_num_features=5, n_categories_per_cat_feature=5, max_tree_depth=6, min_leaf_depth=3, fraction_leaves_per_level=0.15) stream.prepare_for_use() nominal_attr_idx = [x for x in range(5, stream.n_features)] learner = HAT(nominal_attributes=nominal_attr_idx) cnt = 0 max_samples = 5000 predictions = array('d') proba_predictions = [] wait_samples = 100 while cnt < max_samples: X, y = stream.next_sample() # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): predictions.append(learner.predict(X)[0]) proba_predictions.append(learner.predict_proba(X)[0]) learner.partial_fit(X, y) cnt += 1 expected_predictions = array('d', [2.0, 1.0, 1.0, 1.0, 0.0, 3.0, 0.0, 1.0, 1.0, 2.0, 0.0, 2.0, 1.0, 1.0, 2.0, 1.0, 3.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 3.0, 1.0, 2.0, 1.0, 1.0, 3.0, 2.0, 1.0, 2.0, 2.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 2.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 2.0]) test_file = os.path.join(test_path, 'test_hoeffding_adaptive_tree.npy') data = np.load(test_file) assert np.alltrue(predictions == expected_predictions) assert np.allclose(proba_predictions, data) expected_info = 'HAT: max_byte_size: 33554432 - memory_estimate_period: 1000000 - grace_period: 200' \ ' - split_criterion: info_gain - split_confidence: 1e-07 - tie_threshold: 0.05' \ ' - binary_split: False - stop_mem_management: False - remove_poor_atts: False' \ ' - no_pre_prune: False - leaf_prediction: nba - nb_threshold: 0' \ ' - nominal_attributes: [5, 6, 7, 8, 9, 10, 11, 12, 13, 14] - ' assert learner.get_info() == expected_info expected_model_1 = 'Leaf = Class 1.0 | {0.0: 1367.3628584299263, 1.0: 1702.2738590243584,' \ ' 2.0: 952.1668539501372, 3.0: 822.1964285955778}\n' expected_model_2 = 'Leaf = Class 1.0 | {1.0: 1702.2738590243584, 2.0: 952.1668539501372,' \ ' 0.0: 1367.3628584299263, 3.0: 822.1964285955778}\n' expected_model_3 = 'Leaf = Class 1.0 | {1.0: 1702.2738590243584, 2.0: 952.16685395013724, ' \ '0.0: 1367.3628584299263, 3.0: 822.1964285955778}\n' # Python 3.6 expected_model_4 = 'Leaf = Class 1.0 | {0.0: 1367.3628584299263, 1.0: 1702.2738590243584,' \ ' 2.0: 952.16685395013724, 3.0: 822.1964285955778}\n' # Python 3.4 assert (learner.get_model_description() == expected_model_1) \ or (learner.get_model_description() == expected_model_2) \ or (learner.get_model_description() == expected_model_3) \ or (learner.get_model_description() == expected_model_4)
def test_hat_nba(test_path): stream = HyperplaneGenerator(mag_change=0.001, noise_percentage=0.1, random_state=2) stream.prepare_for_use() learner = HAT(leaf_prediction='nba') cnt = 0 max_samples = 5000 y_pred = array('i') y_proba = [] wait_samples = 100 while cnt < max_samples: X, y = stream.next_sample() # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): y_pred.append(learner.predict(X)[0]) y_proba.append(learner.predict_proba(X)[0]) learner.partial_fit(X, y) cnt += 1 expected_predictions = array('i', [ 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0 ]) assert np.alltrue(y_pred == expected_predictions) test_file = os.path.join(test_path, 'test_hoeffding_adaptive_tree_nba.npy') data = np.load(test_file) assert np.allclose(y_proba, data) expected_info = "HAT(binary_split=False, bootstrap_sampling=True, grace_period=200,\n" \ " leaf_prediction='nba', max_byte_size=33554432,\n" \ " memory_estimate_period=1000000, nb_threshold=0, no_preprune=False,\n" \ " nominal_attributes=None, remove_poor_atts=False, split_confidence=1e-07,\n" \ " split_criterion='info_gain', stop_mem_management=False, tie_threshold=0.05)" assert learner.get_info() == expected_info assert type(learner.predict(X)) == np.ndarray assert type(learner.predict_proba(X)) == np.ndarray
# ### Entrenar el clasificador (modelo) # In[2]: clasificador = HAT() print(clasificador.get_info()) print("start training") clasificador.fit(X_train, y_train, classes=None, sample_weight=None) print("end training") # In[3]: print("start predict") predict = clasificador.predict(X_test) print("end predict") # In[4]: print("shape_predict") print(predict.shape) print("score") print(clasificador.score(X_test, y_test)) import matplotlib.pyplot as plt #%matplotlib inline plt.rcParams['figure.figsize'] = (16, 9) plt.style.use('ggplot') plt.hist([predict, y_test]) plt.show()