예제 #1
0
def test_evaluate_delayed_coverage(tmpdir):
    from skmultiflow.data import SEAGenerator
    from skmultiflow.bayes import NaiveBayes

    max_samples = 1000

    # Stream
    data = SEAGenerator(random_state=1)
    # Get X and y
    X, y = data.next_sample(max_samples)
    time = generate_random_dates(seed=1, samples=max_samples)

    # Setup temporal stream
    stream = TemporalDataStream(X, y, time, ordered=False)

    # Learner
    nb = NaiveBayes()

    output_file = os.path.join(str(tmpdir), "prequential_delayed_summary.csv")
    metrics = ['running_time', 'model_size']
    evaluator = EvaluatePrequentialDelayed(max_samples=max_samples,
                                           metrics=metrics,
                                           data_points_for_classification=True,
                                           output_file=output_file)

    evaluator.evaluate(stream=stream, model=nb, model_names=['NB'])
예제 #2
0
def test_evaluate_delayed_multi_target_regression_coverage(tmpdir):
    from skmultiflow.data import RegressionGenerator
    from skmultiflow.trees import iSOUPTreeRegressor

    max_samples = 1000

    # Stream
    data = RegressionGenerator(n_samples=max_samples,
                               n_features=20,
                               n_informative=15,
                               random_state=1,
                               n_targets=7)
    # Get X and y
    X, y = data.next_sample(max_samples)
    time = generate_random_dates(seed=1, samples=max_samples)

    # Setup temporal stream
    stream = TemporalDataStream(X, y, time, ordered=False)

    # Learner
    mtrht = iSOUPTreeRegressor(leaf_prediction='adaptive')

    output_file = os.path.join(str(tmpdir), "prequential_delayed_summary.csv")
    metrics = [
        'average_mean_square_error', 'average_mean_absolute_error',
        'average_root_mean_square_error'
    ]
    evaluator = EvaluatePrequentialDelayed(max_samples=max_samples,
                                           metrics=metrics,
                                           output_file=output_file)

    evaluator.evaluate(stream=stream, model=mtrht, model_names=['MTRHT'])
예제 #3
0
def test_evaluate_delayed_multi_target_classification_coverage(tmpdir):
    # A simple coverage test. Tests for metrics are placed in the corresponding test module.
    from skmultiflow.data import MultilabelGenerator
    from skmultiflow.meta import MultiOutputLearner

    max_samples = 1000

    # Stream
    data = MultilabelGenerator(n_samples=max_samples, random_state=1)
    # Get X and y
    X, y = data.next_sample(max_samples)
    time = generate_random_dates(seed=1, samples=max_samples)

    # Setup temporal stream
    stream = TemporalDataStream(X, y, time, ordered=True)

    # Learner
    mol = MultiOutputLearner()

    output_file = os.path.join(str(tmpdir), "prequential_delayed_summary.csv")
    metrics = ['hamming_score', 'hamming_loss', 'exact_match', 'j_index']
    evaluator = EvaluatePrequentialDelayed(max_samples=max_samples,
                                           metrics=metrics,
                                           output_file=output_file)

    evaluator.evaluate(stream=stream, model=[mol], model_names=['MOL1'])
예제 #4
0
def test_evaluate_delayed_regression_coverage(tmpdir):
    # A simple coverage test. Tests for metrics are placed in the corresponding test module.
    from skmultiflow.data import RegressionGenerator
    from skmultiflow.trees import HoeffdingTreeRegressor

    max_samples = 1000

    # Generate data
    data = RegressionGenerator(n_samples=max_samples)
    # Get X and y
    X, y = data.next_sample(max_samples)
    time = generate_random_dates(seed=1, samples=max_samples)

    # Setup temporal stream
    stream = TemporalDataStream(X, y, time, ordered=False)

    # Learner
    htr = HoeffdingTreeRegressor()

    output_file = os.path.join(str(tmpdir), "prequential_delayed_summary.csv")
    metrics = ['mean_square_error', 'mean_absolute_error']
    evaluator = EvaluatePrequentialDelayed(max_samples=max_samples,
                                           metrics=metrics,
                                           output_file=output_file)

    evaluator.evaluate(stream=stream, model=htr, model_names=['HTR'])
예제 #5
0
def test_evaluate_delayed_classification_single_time_delay(tmpdir):
    # Test using a single delay by time
    data = RandomTreeGenerator(tree_random_state=23,
                               sample_random_state=12,
                               n_classes=2,
                               n_cat_features=2,
                               n_num_features=5,
                               n_categories_per_cat_feature=5,
                               max_tree_depth=6,
                               min_leaf_depth=3,
                               fraction_leaves_per_level=0.15)
    # Number of samples to use
    max_samples = 1000

    # Get X and y
    X, y = data.next_sample(max_samples)
    y = y.astype(int)
    time = generate_random_dates(seed=1, samples=max_samples)

    # Setup temporal stream
    stream = TemporalDataStream(X,
                                y,
                                time,
                                sample_delay=np.timedelta64(30, "D"),
                                ordered=False)

    # Setup learner
    nominal_attr_idx = [x for x in range(15, len(data.feature_names))]
    learner = HoeffdingTreeClassifier(nominal_attributes=nominal_attr_idx)

    output_file = os.path.join(str(tmpdir), "prequential_delayed_summary.csv")
    metrics = [
        'accuracy', 'kappa', 'kappa_t', 'kappa_m', 'f1', 'precision', 'recall',
        'gmean', 'true_vs_predicted'
    ]
    evaluator = EvaluatePrequentialDelayed(max_samples=max_samples,
                                           metrics=metrics,
                                           output_file=output_file)

    # Evaluate
    evaluator.evaluate(stream=stream, model=learner)
    mean_performance, current_performance = evaluator.get_measurements(
        model_idx=0)

    expected_current_accuracy = 0.715
    assert np.isclose(current_performance.accuracy_score(),
                      expected_current_accuracy)
예제 #6
0
def test_evaluate_prequential_delayed_classifier(tmpdir, test_path):
    # Setup file stream to generate data
    data = RandomTreeGenerator(tree_random_state=23,
                               sample_random_state=12,
                               n_classes=4,
                               n_cat_features=2,
                               n_num_features=5,
                               n_categories_per_cat_feature=5,
                               max_tree_depth=6,
                               min_leaf_depth=3,
                               fraction_leaves_per_level=0.15)
    # Number of samples to use
    max_samples = 1000

    # Get X and y
    X, y = data.next_sample(max_samples)
    y = y.astype(int)
    time = generate_random_dates(seed=1, samples=max_samples)

    # Setup temporal stream
    stream = TemporalDataStream(X, y, time, ordered=True)

    # Setup learner
    nominal_attr_idx = [x for x in range(15, len(data.feature_names))]
    learner = HoeffdingTreeClassifier(nominal_attributes=nominal_attr_idx)

    # Setup evaluator
    metrics = ['accuracy', 'kappa', 'kappa_t']
    output_file = os.path.join(str(tmpdir), "prequential_delayed_summary.csv")
    evaluator = EvaluatePrequentialDelayed(max_samples=max_samples,
                                           metrics=metrics,
                                           output_file=output_file)

    # Evaluate
    result = evaluator.evaluate(stream=stream, model=[learner])
    result_learner = result[0]

    assert isinstance(result_learner, HoeffdingTreeClassifier)

    assert learner.model_measurements == result_learner.model_measurements

    expected_file = os.path.join(test_path, 'prequential_delayed_summary.csv')
    compare_files(output_file, expected_file)

    mean_performance, current_performance = evaluator.get_measurements(
        model_idx=0)

    # Simple test. Tests for metrics are placed in the corresponding test module.
    expected_mean_accuracy = 0.436250
    assert np.isclose(mean_performance.accuracy_score(),
                      expected_mean_accuracy)

    expected_mean_kappa = 0.231791
    assert np.isclose(mean_performance.kappa_score(), expected_mean_kappa)

    expected_mean_kappa_t = 0.236886
    assert np.isclose(mean_performance.kappa_t_score(), expected_mean_kappa_t)

    expected_current_accuracy = 0.430000
    assert np.isclose(current_performance.accuracy_score(),
                      expected_current_accuracy)

    expected_current_kappa = 0.223909
    assert np.isclose(current_performance.kappa_score(),
                      expected_current_kappa)

    expected_current_kappa_t = 0.240000
    assert np.isclose(current_performance.kappa_t_score(),
                      expected_current_kappa_t)

    expected_info = "EvaluatePrequentialDelayed(batch_size=1, " \
                    "data_points_for_classification=False, max_samples=1000, max_time=inf, " \
                    "metrics=['accuracy', 'kappa', 'kappa_t'], n_wait=200, " \
                    "output_file='prequential_delayed_summary.csv', pretrain_size=200, " \
                    "restart_stream=True, show_plot=False)"
    info = " ".join([line.strip() for line in evaluator.get_info().split()])
    assert info == expected_info