def test_evaluate_classification_coverage(tmpdir): # A simple coverage test. Tests for metrics are placed in the corresponding test module. stream = RandomTreeGenerator(tree_random_state=23, sample_random_state=12, n_classes=2, n_cat_features=2, n_num_features=5, n_categories_per_cat_feature=5, max_tree_depth=6, min_leaf_depth=3, fraction_leaves_per_level=0.15) # Learner nominal_attr_idx = [x for x in range(15, len(stream.feature_names))] learner = HoeffdingTreeClassifier(nominal_attributes=nominal_attr_idx) max_samples = 1000 output_file = os.path.join(str(tmpdir), "prequential_summary.csv") metrics = [ 'accuracy', 'kappa', 'kappa_t', 'kappa_m', 'f1', 'precision', 'recall', 'gmean', 'true_vs_predicted' ] evaluator = EvaluatePrequential(max_samples=max_samples, metrics=metrics, output_file=output_file) # Evaluate evaluator.evaluate(stream=stream, model=learner) mean_performance, current_performance = evaluator.get_measurements( model_idx=0) expected_current_accuracy = 0.685 assert np.isclose(current_performance.accuracy_score(), expected_current_accuracy)
def test_evaluate_prequential_classifier(tmpdir, test_path): # Setup file stream stream = RandomTreeGenerator(tree_random_state=23, sample_random_state=12, n_classes=4, n_cat_features=2, n_num_features=5, n_categories_per_cat_feature=5, max_tree_depth=6, min_leaf_depth=3, fraction_leaves_per_level=0.15) stream.prepare_for_use() # Setup learner nominal_attr_idx = [x for x in range(15, len(stream.feature_names))] learner = HoeffdingTree(nominal_attributes=nominal_attr_idx) # Setup evaluator max_samples = 1000 metrics = ['accuracy', 'kappa', 'kappa_t'] output_file = os.path.join(str(tmpdir), "prequential_summary.csv") evaluator = EvaluatePrequential(max_samples=max_samples, metrics=metrics, output_file=output_file) # Evaluate result = evaluator.evaluate(stream=stream, model=learner) result_learner = result[0] assert isinstance(result_learner, HoeffdingTree) assert learner.get_model_measurements == result_learner.get_model_measurements expected_file = os.path.join(test_path, 'prequential_summary.csv') compare_files(output_file, expected_file) mean_performance, current_performance = evaluator.get_measurements(model_idx=0) expected_mean_accuracy = 0.436250 assert np.isclose(mean_performance.get_accuracy(), expected_mean_accuracy) expected_mean_kappa = 0.231791 assert np.isclose(mean_performance.get_kappa(), expected_mean_kappa) expected_mean_kappa_t = 0.236887 assert np.isclose(mean_performance.get_kappa_t(), expected_mean_kappa_t) expected_current_accuracy = 0.430000 assert np.isclose(current_performance.get_accuracy(), expected_current_accuracy) expected_current_kappa = 0.223909 assert np.isclose(current_performance.get_kappa(), expected_current_kappa) expected_current_kappa_t = 0.240000 assert np.isclose(current_performance.get_kappa_t(), expected_current_kappa_t) expected_info = "EvaluatePrequential(batch_size=1, data_points_for_classification=False,\n" \ " max_samples=1000, max_time=inf,\n" \ " metrics=['accuracy', 'kappa', 'kappa_t'], n_wait=200,\n" \ " output_file='prequential_summary.csv',\n" \ " pretrain_size=200, restart_stream=True, show_plot=False)" assert evaluator.get_info() == expected_info
def test_evaluate_classification_metrics(): stream = RandomTreeGenerator(tree_random_state=23, sample_random_state=12, n_classes=2, n_cat_features=2, n_num_features=5, n_categories_per_cat_feature=5, max_tree_depth=6, min_leaf_depth=3, fraction_leaves_per_level=0.15) stream.prepare_for_use() # Setup learner nominal_attr_idx = [x for x in range(15, len(stream.feature_names))] learner = HoeffdingTree(nominal_attributes=nominal_attr_idx) max_samples = 1000 metrics = ['f1', 'precision', 'recall', 'gmean'] evaluator = EvaluatePrequential(max_samples=max_samples, metrics=metrics) # Evaluate evaluator.evaluate(stream=stream, model=learner) mean_performance, current_performance = evaluator.get_measurements(model_idx=0) expected_current_f1_score = 0.7096774193548387 expected_current_precision = 0.6814159292035398 expected_current_recall = 0.7403846153846154 expected_current_g_mean = 0.6802502367624613 expected_mean_f1_score = 0.7009803921568628 expected_mean_precision = 0.7185929648241206 expected_mean_recall = 0.6842105263157895 expected_mean_g_mean = 0.6954166367760247 print(mean_performance.get_g_mean()) print(mean_performance.get_recall()) print(mean_performance.get_precision()) print(mean_performance.get_f1_score()) print(current_performance.get_g_mean()) print(current_performance.get_recall()) print(current_performance.get_precision()) print(current_performance.get_f1_score()) assert np.isclose(current_performance.get_f1_score(), expected_current_f1_score) assert np.isclose(current_performance.get_precision(), expected_current_precision) assert np.isclose(current_performance.get_recall(), expected_current_recall) assert np.isclose(current_performance.get_g_mean(), expected_current_g_mean) assert np.isclose(mean_performance.get_f1_score(), expected_mean_f1_score) assert np.isclose(mean_performance.get_precision(), expected_mean_precision) assert np.isclose(mean_performance.get_recall(), expected_mean_recall) assert np.isclose(mean_performance.get_g_mean(), expected_mean_g_mean)
def test_data_stream(test_path): test_file = os.path.join(test_path, 'data/data_n30000.csv') raw_data = pd.read_csv(test_file) stream = DataStream(raw_data, name='Test') normal_knn_learner = KNNClassifier( n_neighbors=8, max_window_size=2000, leaf_size=40, ) weighted_knn_learner = WeightedKNNClassifier(n_neighbors=8, max_window_size=2000, leaf_size=40) standardize_knn_learner = KNNClassifier(n_neighbors=8, max_window_size=2000, leaf_size=40, standardize=True) nominal_attr_idx = [x for x in range(15, len(stream.feature_names))] hoeffding_learner = HoeffdingTreeClassifier( nominal_attributes=nominal_attr_idx) nb_learner = NaiveBayes() metrics = ['accuracy', 'kappa_m', 'kappa_t', 'recall'] output_file = os.path.join(test_path, 'data/kkn_output.csv') evaluator = EvaluatePrequential(metrics=metrics, output_file=output_file) # Evaluate result = evaluator.evaluate(stream=stream, model=[ normal_knn_learner, weighted_knn_learner, standardize_knn_learner, hoeffding_learner, nb_learner, ]) mean_performance, current_performance = evaluator.get_measurements() assert 1 == 1