def test_hoeffding_tree_coverage(test_path):
    # Cover nominal attribute observer
    test_file = os.path.join(test_path, 'multi_target_regression_data.npz')
    data = np.load(test_file)
    X = data['X']
    Y = data['Y']

    learner = MultiTargetRegressionHoeffdingTree(
        leaf_prediction='mean', nominal_attributes=[i for i in range(3)])
    learner.partial_fit(X, Y)
예제 #2
0
def test_hoeffding_tree_coverage(test_path):
    # Cover nominal attribute observer
    test_file = os.path.join(test_path, 'multi_target_regression_data.npz')
    data = np.load(test_file)
    X = data['X']
    Y = data['Y']

    # Invalid leaf prediction option
    learner = MultiTargetRegressionHoeffdingTree(
        leaf_prediction='MEAN', nominal_attributes=[i for i in range(3)])
    print(learner.split_criterion)
    # Invalid split_criterion
    learner.split_criterion = 'ICVR'
    learner.partial_fit(X, Y)
예제 #3
0
def test_evaluate_multi_target_regression_coverage(tmpdir):
    from skmultiflow.data import RegressionGenerator
    from skmultiflow.trees import MultiTargetRegressionHoeffdingTree

    max_samples = 1000

    # Stream
    stream = RegressionGenerator(n_samples=max_samples,
                                 n_features=20,
                                 n_informative=15,
                                 random_state=1,
                                 n_targets=7)
    stream.prepare_for_use()

    # Learner
    mtrht = MultiTargetRegressionHoeffdingTree(leaf_prediction='adaptive')

    output_file = os.path.join(str(tmpdir), "prequential_summary.csv")
    metrics = [
        'average_mean_square_error', 'average_mean_absolute_error',
        'average_root_mean_square_error'
    ]
    evaluator = EvaluatePrequential(max_samples=max_samples,
                                    metrics=metrics,
                                    output_file=output_file)

    evaluator.evaluate(stream=stream, model=mtrht, model_names=['MTRHT'])
예제 #4
0
def test_multi_target_regression_hoeffding_tree_model_description():
    stream = RegressionGenerator(n_samples=700,
                                 n_features=20,
                                 n_informative=15,
                                 random_state=1,
                                 n_targets=3)
    stream.prepare_for_use()

    learner = MultiTargetRegressionHoeffdingTree(leaf_prediction='mean')

    max_samples = 700
    X, y = stream.next_sample(max_samples)
    # Trying to predict without fitting
    learner.predict(X[0])

    learner.partial_fit(X, y)

    expected_description = "if Attribute 11 <= 0.36737233297880056:\n" \
                            "  Leaf = Statistics {0: 450.0000, 1: [-23322.8079, -30257.1616, -18740.9462], " \
                            "2: [22242706.1751, 29895648.2424, 18855571.7943]}\n" \
                            "if Attribute 11 > 0.36737233297880056:\n" \
                            "  Leaf = Statistics {0: 250.0000, 1: [33354.8675, 32390.6094, 22886.4176], " \
                            "2: [15429435.6709, 17908472.4289, 10709746.1079]}\n" \

    assert SequenceMatcher(None, expected_description,
                           learner.get_model_description()).ratio() > 0.9
def test_multi_target_regression_hoeffding_tree_mean(test_path):
    stream = RegressionGenerator(n_samples=2000,
                                 n_features=20,
                                 n_informative=15,
                                 random_state=1,
                                 n_targets=3)
    stream.prepare_for_use()

    learner = MultiTargetRegressionHoeffdingTree(leaf_prediction='mean')

    cnt = 0
    max_samples = 2000
    wait_samples = 200
    y_pred = np.zeros((int(max_samples / wait_samples), 3))
    y_true = np.zeros((int(max_samples / wait_samples), 3))

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            y_pred[int(cnt / wait_samples), :] = learner.predict(X)
            y_true[int(cnt / wait_samples), :] = y
        learner.partial_fit(X, y)
        cnt += 1

    test_file = os.path.join(
        test_path, 'expected_preds_multi_target_regression_mean.npy')
    expected_predictions = np.load(test_file)

    assert np.allclose(y_pred, expected_predictions)

    error = mean_absolute_error(y_true, y_pred)
    expected_error = 191.2823924547882
    assert np.isclose(error, expected_error)

    expected_info = "MultiTargetRegressionHoeffdingTree(binary_split=False, grace_period=200,\n" \
                    "                                   leaf_prediction='mean',\n" \
                    "                                   learning_ratio_const=True,\n" \
                    "                                   learning_ratio_decay=0.001,\n" \
                    "                                   learning_ratio_perceptron=0.02,\n" \
                    "                                   max_byte_size=33554432,\n" \
                    "                                   memory_estimate_period=1000000,\n" \
                    "                                   nb_threshold=0, no_preprune=False,\n" \
                    "                                   nominal_attributes=None, random_state=None,\n" \
                    "                                   remove_poor_atts=False,\n" \
                    "                                   split_confidence=1e-07,\n" \
                    "                                   stop_mem_management=False,\n" \
                    "                                   tie_threshold=0.05)"
    assert learner.get_info() == expected_info
    assert isinstance(learner.get_model_description(), type(''))
    assert type(learner.predict(X)) == np.ndarray
def test_multi_target_regression_hoeffding_tree_mean(test_path):
    stream = RegressionGenerator(n_samples=500,
                                 n_features=20,
                                 n_informative=15,
                                 random_state=1,
                                 n_targets=3)
    stream.prepare_for_use()

    learner = MultiTargetRegressionHoeffdingTree(leaf_prediction='mean')

    cnt = 0
    max_samples = 500
    wait_samples = 10
    y_pred = np.zeros((int(max_samples / wait_samples), 3))
    y_true = np.zeros((int(max_samples / wait_samples), 3))

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            y_pred[int(cnt / wait_samples), :] = learner.predict(X)
            y_true[int(cnt / wait_samples), :] = y
        learner.partial_fit(X, y)
        cnt += 1

    test_file = os.path.join(
        test_path, 'expected_preds_multi_target_regression_mean.npy')
    expected_predictions = np.load(test_file)

    # print(expected_predictions.shape)
    assert np.allclose(y_pred, expected_predictions)

    error = mean_absolute_error(y_true, y_pred)
    expected_error = 167.40626294018753
    assert np.isclose(error, expected_error)

    expected_info = \
        'MultiTargetRegressionHoeffdingTree: max_byte_size: 33554432 - ' \
        'memory_estimate_period: 1000000 - grace_period: 200 - ' \
        'split_criterion: intra cluster variance reduction - ' \
        'split_confidence: 1e-07 - tie_threshold: 0.05 - binary_split: False' \
        ' - stop_mem_management: False - remove_poor_atts: False ' \
        '- no_pre_prune: False - leaf_prediction: mean - nb_threshold: 0 - ' \
        'nominal_attributes: [] - '
    assert learner.get_info() == expected_info
    assert isinstance(learner.get_model_description(), type(''))
예제 #7
0
def test_multi_target_regression_hoeffding_tree_categorical_features(
        test_path):
    data_path = os.path.join(test_path, 'ht_categorical_features_testcase.npy')
    stream = np.load(data_path)
    X, y = stream[:, :-2], stream[:, -2:]

    nominal_attr_idx = np.arange(8)
    learner = MultiTargetRegressionHoeffdingTree(
        nominal_attributes=nominal_attr_idx, leaf_prediction='perceptron')

    learner.partial_fit(X, y)

    expected_description = "if Attribute 0 = -15.0:\n" \
                           "  if Attribute 3 = 0.0:\n" \
                           "    Leaf = Statistics {0: 80.0000, 1: [-192.4417, 80.0908], 2: [464.1268, 80.1882]}\n" \
                           "  if Attribute 3 = 1.0:\n" \
                           "    Leaf = Statistics {0: 77.0000, 1: [-184.8333, -7.2503], 2: [444.9068, 42.7423]}\n" \
                           "  if Attribute 3 = 2.0:\n" \
                           "    Leaf = Statistics {0: 56.0000, 1: [-134.1829, -1.0863], 2: [322.1336, 28.1218]}\n" \
                           "  if Attribute 3 = 3.0:\n" \
                           "    Leaf = Statistics {0: 62.0000, 1: [-148.2397, -17.2837], 2: [355.5327, 38.6913]}\n" \
                           "if Attribute 0 = 0.0:\n" \
                           "  Leaf = Statistics {0: 672.0000, 1: [390.6773, 672.0472], 2: [761.0744, 672.1686]}\n" \
                           "if Attribute 0 = 1.0:\n" \
                           "  Leaf = Statistics {0: 644.0000, 1: [671.3479, 174.3011], 2: [927.5194, 466.7064]}\n" \
                           "if Attribute 0 = 2.0:\n" \
                           "  Leaf = Statistics {0: 619.0000, 1: [867.2865, 320.6506], 2: [1262.0992, 435.2835]}\n" \
                           "if Attribute 0 = 3.0:\n" \
                           "  Leaf = Statistics {0: 627.0000, 1: [987.0864, 331.0822], 2: [1583.8108, 484.0456]}\n" \
                           "if Attribute 0 = -30.0:\n" \
                           "  Leaf = Statistics {0: 88.0000, 1: [-269.7967, 25.9328], 2: [828.2289, 57.6501]}\n"

    assert SequenceMatcher(None, expected_description,
                           learner.get_model_description()).ratio() > 0.9

    new_sample = X[0].copy()
    # Adding a new category on the split feature
    new_sample[0] = 7
    learner.predict([new_sample])

    # Let's do the same considering other prediction strategy
    learner = MultiTargetRegressionHoeffdingTree(
        nominal_attributes=nominal_attr_idx, leaf_prediction='adaptive')

    learner.partial_fit(X, y)
    learner.predict([new_sample])