예제 #1
0
def test_isoup_tree_model_description():
    stream = RegressionGenerator(n_samples=700,
                                 n_features=20,
                                 n_informative=15,
                                 random_state=1,
                                 n_targets=3)
    stream.prepare_for_use()

    learner = iSOUPTreeRegressor(leaf_prediction='mean')

    max_samples = 700
    X, y = stream.next_sample(max_samples)
    # Trying to predict without fitting
    learner.predict(X[0])

    learner.partial_fit(X, y)

    expected_description = "if Attribute 11 <= 0.36737233297880056:\n" \
                            "  Leaf = Statistics {0: 450.0000, 1: [-23322.8079, -30257.1616, -18740.9462], " \
                            "2: [22242706.1751, 29895648.2424, 18855571.7943]}\n" \
                            "if Attribute 11 > 0.36737233297880056:\n" \
                            "  Leaf = Statistics {0: 250.0000, 1: [33354.8675, 32390.6094, 22886.4176], " \
                            "2: [15429435.6709, 17908472.4289, 10709746.1079]}\n" \

    assert SequenceMatcher(None, expected_description,
                           learner.get_model_description()).ratio() > 0.9
예제 #2
0
def test_evaluate_multi_target_regression_coverage(tmpdir):
    from skmultiflow.data import RegressionGenerator
    from skmultiflow.trees import MultiTargetRegressionHoeffdingTree

    max_samples = 1000

    # Stream
    stream = RegressionGenerator(n_samples=max_samples,
                                 n_features=20,
                                 n_informative=15,
                                 random_state=1,
                                 n_targets=7)
    stream.prepare_for_use()

    # Learner
    mtrht = MultiTargetRegressionHoeffdingTree(leaf_prediction='adaptive')

    output_file = os.path.join(str(tmpdir), "prequential_summary.csv")
    metrics = [
        'average_mean_square_error', 'average_mean_absolute_error',
        'average_root_mean_square_error'
    ]
    evaluator = EvaluatePrequential(max_samples=max_samples,
                                    metrics=metrics,
                                    output_file=output_file)

    evaluator.evaluate(stream=stream, model=mtrht, model_names=['MTRHT'])
예제 #3
0
def demo(output_file=None):
    """ Test iSOUP-Tree

    This demo demonstrates how to evaluate a iSOUP-Tree multi-target regressor.

    Parameters
    ----------
    output_file: string
        The name of the csv output file

    """
    stream = RegressionGenerator(n_samples=5000,
                                 n_features=20,
                                 n_informative=15,
                                 random_state=1,
                                 n_targets=7)
    stream.prepare_for_use()

    regressor = iSOUPTreeRegressor(leaf_prediction='adaptive')

    # Setup the evaluator
    evaluator = EvaluatePrequential(pretrain_size=1,
                                    batch_size=1,
                                    n_wait=200,
                                    max_time=1000,
                                    output_file=output_file,
                                    show_plot=False,
                                    metrics=[
                                        'average_mean_square_error',
                                        'average_mean_absolute_error',
                                        'average_root_mean_square_error'
                                    ])

    # Evaluate
    evaluator.evaluate(stream=stream, model=regressor)
def test_hoeffding_tree():
    stream = RegressionGenerator(n_samples=500,
                                 n_features=20,
                                 n_informative=15,
                                 random_state=1)
    stream.prepare_for_use()

    learner = HoeffdingAdaptiveTreeRegressor(leaf_prediction='mean',
                                             random_state=1)

    cnt = 0
    max_samples = 500
    y_pred = array('d')
    y_true = array('d')
    wait_samples = 10

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            y_pred.append(learner.predict(X)[0])
            y_true.append(y[0])
        learner.partial_fit(X, y)
        cnt += 1

    expected_predictions = array('d', [
        102.38946041769101, 55.6584574987656, 5.746076599168373,
        17.11797209372667, 2.566888222752787, 9.188247802192826,
        17.87894804676911, 15.940629626883966, 8.981172175448485,
        13.152624115190092, 11.106058099429399, 6.473195313058236,
        4.723621479590173, 13.825568609556493, 8.698873073880696,
        1.6452441811010252, 5.123496188584294, 6.34387187194982,
        5.9977733790395105, 6.874251577667707, 4.605348088338317,
        8.20112636572672, 9.032631648758098, 4.428189978974459,
        4.249801041367518, 9.983272668044492, 12.859518508979734,
        11.741395774380285, 11.230028410261868, 9.126921979081521,
        9.132146661688296, 7.750655625124709, 6.445145118245414,
        5.760928671876355, 4.041291302080659, 3.591837600560529,
        0.7640424010500604, 0.1738639840537784, 2.2068337802212286,
        -81.05302946841077, 96.17757415335177, -77.35894903819677,
        95.85568683733698, 99.1981674250886, 99.89327888035015,
        101.66673013734784, -79.1904234513751, -80.42952143783687,
        100.63954789983896
    ])
    assert np.allclose(y_pred, expected_predictions)

    error = mean_absolute_error(y_true, y_pred)
    expected_error = 143.11351404083086
    assert np.isclose(error, expected_error)

    expected_info = "HoeffdingAdaptiveTreeRegressor(binary_split=False, grace_period=200, leaf_prediction='mean', " \
                    "learning_ratio_const=True, learning_ratio_decay=0.001, learning_ratio_perceptron=0.02, " \
                    "max_byte_size=33554432, memory_estimate_period=1000000, nb_threshold=0, no_preprune=False, " \
                    "nominal_attributes=None, random_state=1, remove_poor_atts=False, split_confidence=1e-07, " \
                    "stop_mem_management=False, tie_threshold=0.05)"
    info = " ".join([line.strip() for line in learner.get_info().split()])
    assert info == expected_info

    assert isinstance(learner.get_model_description(), type(''))
    assert type(learner.predict(X)) == np.ndarray
예제 #5
0
def demo(input_file, output_file=None):
    """ _test_mtr_regression

    This demo demonstrates how to evaluate a Multi-Target Regressor. The
    employed dataset is 'scm1d', which is contained in the data folder.

    Parameters
    ----------
    input_file: string
        A string describind the path for the input dataset

    output_file: string
        The name of the csv output file

    """
    stream = RegressionGenerator(n_samples=5000, n_features=20,
                                 n_informative=15, random_state=1,
                                 n_targets=7)
    stream.prepare_for_use()

    classifier = MultiTargetRegressionHoeffdingTree(leaf_prediction='adaptive')

    # Setup the pipeline
    pipe = Pipeline([('Classifier', classifier)])

    # Setup the evaluator
    evaluator = EvaluatePrequential(pretrain_size=1, batch_size=1, n_wait=200,
                                    max_time=1000, output_file=output_file,
                                    show_plot=False,
                                    metrics=['average_mean_square_error',
                                             'average_mean_absolute_error',
                                             'average_root_mean_square_error'])

    # Evaluate
    evaluator.evaluate(stream=stream, model=pipe)
예제 #6
0
def test_hoeffding_tree_regressor_perceptron():
    stream = RegressionGenerator(n_samples=500,
                                 n_features=20,
                                 n_informative=15,
                                 random_state=1)
    stream.prepare_for_use()

    learner = HoeffdingTreeRegressor(leaf_prediction='perceptron',
                                     random_state=1)

    cnt = 0
    max_samples = 500
    y_pred = array('d')
    y_true = array('d')
    wait_samples = 10

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            y_pred.append(learner.predict(X)[0])
            y_true.append(y[0])
        learner.partial_fit(X, y)
        cnt += 1

    expected_predictions = array('d', [
        1198.4326121743168, 456.36607750881586, 927.9912160545144,
        1160.4797981899128, 506.50541829176535, -687.8187227095925,
        -677.8120094065415, 231.14888704761225, -284.46324039942937,
        -255.69195985557175, 47.58787439365423, -135.22494016284043,
        -10.351457437330152, 164.95903200643997, 360.72854984472383,
        193.30633911830088, -64.23638301570358, 587.9771578214296,
        649.8395655757931, 481.01214222804026, 305.4402728117724,
        266.2096493865043, -445.11447171009775, -567.5748694154349,
        -68.70070048021438, -446.79910655850153, -115.892348067663,
        -98.26862866231015, 71.04707905920286, -10.239274802165584,
        18.748731569441812, 4.971217265129857, 172.2223575990573,
        -655.2864976783711, -129.69921313686626, -114.01187375876822,
        -405.66166686550963, -215.1264381928009, -345.91020370426247,
        -80.49330468453074, 108.78958382083302, 134.95267043280126,
        -398.5273538477553, -157.1784910649728, 219.72541225645654,
        -100.91598162899217, 80.9768574308987, -296.8856956382453,
        251.9332271253148
    ])
    assert np.allclose(y_pred, expected_predictions)

    error = mean_absolute_error(y_true, y_pred)
    expected_error = 362.98595964244623
    assert np.isclose(error, expected_error)

    expected_info = "HoeffdingTreeRegressor(binary_split=False, grace_period=200, leaf_prediction='perceptron', " \
                    "learning_ratio_const=True, learning_ratio_decay=0.001, learning_ratio_perceptron=0.02, " \
                    "max_byte_size=33554432, memory_estimate_period=1000000, nb_threshold=0, no_preprune=False, " \
                    "nominal_attributes=None, random_state=1, remove_poor_atts=False, split_confidence=1e-07, " \
                    "stop_mem_management=False, tie_threshold=0.05)"
    info = " ".join([line.strip() for line in learner.get_info().split()])
    assert info == expected_info

    assert isinstance(learner.get_model_description(), type(''))
    assert type(learner.predict(X)) == np.ndarray
def test_hoeffding_tree_regressor_perceptron():
    stream = RegressionGenerator(n_samples=500, n_features=20, n_informative=15, random_state=1)
    stream.prepare_for_use()

    learner = HoeffdingTreeRegressor(leaf_prediction='perceptron', random_state=1)

    cnt = 0
    max_samples = 500
    y_pred = array('d')
    y_true = array('d')
    wait_samples = 10

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            y_pred.append(learner.predict(X)[0])
            y_true.append(y[0])
        learner.partial_fit(X, y)
        cnt += 1

    expected_predictions = array('d', [525.7553636732247, 352.8160300365902, 224.80744320456478,
                                       193.72837054292074, 132.6059603765031, 117.06974933197759,
                                       114.53342429855932, 89.37195405567235, 57.85335051891305,
                                       60.00883955911155, 47.263185779784266, 25.17616431074491,
                                       17.43259526890146, 47.33468996498019, 22.83975208548138,
                                       -7.659282840823236, 8.564101665071064, 14.61585289361161,
                                       11.560941733770441, 13.70120291865976, 1.1938438210799651,
                                       19.01970713481836, 21.23459424444584, -5.667473522309328,
                                       -5.203149619381393, 28.726275200889173, 41.03406433337882,
                                       27.950322712127267, 21.267116786963925, 5.53344652490152,
                                       6.753264259267268, -2.3288137435962213, -10.492766334689875,
                                       -11.19641058176631, -20.134685945295644, -19.36581990084085,
                                       -38.26894947177957, -34.90246284430353, -11.019543212232008,
                                       -22.016714766708127, -18.710456277443544, -20.5568019328217,
                                       -2.636583876625667, 24.787714491718187, 29.325261678088406,
                                       45.31267371823666, -48.271054430207776, -59.7649172085901,
                                       48.22724814037523])
    # assert np.allclose(y_pred, expected_predictions)

    error = mean_absolute_error(y_true, y_pred)
    expected_error = 152.12931270533377
    assert np.isclose(error, expected_error)

    expected_info = "HoeffdingTreeRegressor(binary_split=False, grace_period=200, leaf_prediction='perceptron', " \
                    "learning_ratio_const=True, learning_ratio_decay=0.001, learning_ratio_perceptron=0.02, " \
                    "max_byte_size=33554432, memory_estimate_period=1000000, nb_threshold=0, no_preprune=False, " \
                    "nominal_attributes=None, random_state=1, remove_poor_atts=False, split_confidence=1e-07, " \
                    "stop_mem_management=False, tie_threshold=0.05)"
    info = " ".join([line.strip() for line in learner.get_info().split()])
    assert info == expected_info

    assert isinstance(learner.get_model_description(), type(''))
    assert type(learner.predict(X)) == np.ndarray
예제 #8
0
def test_stacked_single_target_hoeffding_tree_regressor_adaptive(test_path):
    stream = RegressionGenerator(n_samples=2000,
                                 n_features=20,
                                 n_informative=15,
                                 random_state=1,
                                 n_targets=3)
    stream.prepare_for_use()

    learner = StackedSingleTargetHoeffdingTreeRegressor(
        leaf_prediction='adaptive', random_state=1)

    cnt = 0
    max_samples = 2000
    wait_samples = 200
    y_pred = np.zeros((int(max_samples / wait_samples), 3))
    y_true = np.zeros((int(max_samples / wait_samples), 3))

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            y_pred[int(cnt / wait_samples), :] = learner.predict(X)
            y_true[int(cnt / wait_samples), :] = y
        learner.partial_fit(X, y)
        cnt += 1

    test_file = os.path.join(
        test_path,
        'expected_preds_stacked_single_target_hoeffding_tree_adaptive.npy')
    expected_predictions = np.load(test_file)

    assert np.allclose(y_pred, expected_predictions)
    error = mean_absolute_error(y_true, y_pred)
    expected_error = 150.7836894811965
    assert np.isclose(error, expected_error)

    expected_info = "StackedSingleTargetHoeffdingTreeRegressor(binary_split=False, grace_period=200,\n" \
                    "                                          leaf_prediction='adaptive',\n" \
                    "                                          learning_ratio_const=True,\n" \
                    "                                          learning_ratio_decay=0.001,\n" \
                    "                                          learning_ratio_perceptron=0.02,\n" \
                    "                                          max_byte_size=33554432,\n" \
                    "                                          memory_estimate_period=1000000,\n" \
                    "                                          nb_threshold=0, no_preprune=False,\n" \
                    "                                          nominal_attributes=None,\n" \
                    "                                          random_state=1,\n" \
                    "                                          remove_poor_atts=False,\n" \
                    "                                          split_confidence=1e-07,\n" \
                    "                                          stop_mem_management=False,\n" \
                    "                                          tie_threshold=0.05)"

    assert learner.get_info() == expected_info
    assert isinstance(learner.get_model_description(), type(''))
예제 #9
0
def test_multi_output_learner_regressor():

    stream = RegressionGenerator(n_samples=5500,
                                 n_features=10,
                                 n_informative=20,
                                 n_targets=2,
                                 random_state=1)
    stream.prepare_for_use()

    estimator = SGDRegressor(random_state=112,
                             tol=1e-3,
                             max_iter=10,
                             loss='squared_loss')
    learner = MultiOutputLearner(base_estimator=estimator)

    X, y = stream.next_sample(150)
    learner.partial_fit(X, y)

    cnt = 0
    max_samples = 5000
    predictions = []
    true_targets = []
    wait_samples = 100
    correct_predictions = 0

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(learner.predict(X)[0])
            true_targets.append(y[0])
            if np.array_equal(y[0], predictions[-1]):
                correct_predictions += 1

        learner.partial_fit(X, y)
        cnt += 1

    expected_performance = 2.444365309339395
    performance = mean_absolute_error(true_targets, predictions)
    assert np.isclose(performance, expected_performance)

    assert learner._estimator_type == "regressor"
    assert type(learner.predict(X)) == np.ndarray

    with pytest.raises(AttributeError):
        learner.predict_proba(X)
def test_multi_target_regression_hoeffding_tree_mean(test_path):
    stream = RegressionGenerator(n_samples=500,
                                 n_features=20,
                                 n_informative=15,
                                 random_state=1,
                                 n_targets=3)
    stream.prepare_for_use()

    learner = MultiTargetRegressionHoeffdingTree(leaf_prediction='mean')

    cnt = 0
    max_samples = 500
    wait_samples = 10
    y_pred = np.zeros((int(max_samples / wait_samples), 3))
    y_true = np.zeros((int(max_samples / wait_samples), 3))

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            y_pred[int(cnt / wait_samples), :] = learner.predict(X)
            y_true[int(cnt / wait_samples), :] = y
        learner.partial_fit(X, y)
        cnt += 1

    test_file = os.path.join(
        test_path, 'expected_preds_multi_target_regression_mean.npy')
    expected_predictions = np.load(test_file)

    # print(expected_predictions.shape)
    assert np.allclose(y_pred, expected_predictions)

    error = mean_absolute_error(y_true, y_pred)
    expected_error = 167.40626294018753
    assert np.isclose(error, expected_error)

    expected_info = \
        'MultiTargetRegressionHoeffdingTree: max_byte_size: 33554432 - ' \
        'memory_estimate_period: 1000000 - grace_period: 200 - ' \
        'split_criterion: intra cluster variance reduction - ' \
        'split_confidence: 1e-07 - tie_threshold: 0.05 - binary_split: False' \
        ' - stop_mem_management: False - remove_poor_atts: False ' \
        '- no_pre_prune: False - leaf_prediction: mean - nb_threshold: 0 - ' \
        'nominal_attributes: [] - '
    assert learner.get_info() == expected_info
    assert isinstance(learner.get_model_description(), type(''))
예제 #11
0
def test_isoup_tree_mean(test_path):
    stream = RegressionGenerator(n_samples=2000,
                                 n_features=20,
                                 n_informative=15,
                                 random_state=1,
                                 n_targets=3)
    stream.prepare_for_use()

    learner = iSOUPTreeRegressor(leaf_prediction='mean')

    cnt = 0
    max_samples = 2000
    wait_samples = 200
    y_pred = np.zeros((int(max_samples / wait_samples), 3))
    y_true = np.zeros((int(max_samples / wait_samples), 3))

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            y_pred[int(cnt / wait_samples), :] = learner.predict(X)
            y_true[int(cnt / wait_samples), :] = y
        learner.partial_fit(X, y)
        cnt += 1

    test_file = os.path.join(
        test_path, 'expected_preds_multi_target_regression_mean.npy')
    expected_predictions = np.load(test_file)

    assert np.allclose(y_pred, expected_predictions)

    error = mean_absolute_error(y_true, y_pred)
    expected_error = 191.2823924547882
    assert np.isclose(error, expected_error)

    expected_info = "iSOUPTreeRegressor(binary_split=False, grace_period=200, leaf_prediction='mean', " \
                    "learning_ratio_const=True, learning_ratio_decay=0.001, learning_ratio_perceptron=0.02, " \
                    "max_byte_size=33554432, memory_estimate_period=1000000, nb_threshold=0, no_preprune=False, " \
                    "nominal_attributes=None, random_state=None, remove_poor_atts=False, split_confidence=1e-07, " \
                    "stop_mem_management=False, tie_threshold=0.05)"
    info = " ".join([line.strip() for line in learner.get_info().split()])
    assert info == expected_info

    assert type(learner.predict(X)) == np.ndarray
예제 #12
0
def test_regression_hoeffding_tree_model_description():
    stream = RegressionGenerator(
        n_samples=500, n_features=20, n_informative=15, random_state=1
    )
    stream.prepare_for_use()

    learner = RegressionHoeffdingTree(leaf_prediction='mean')

    max_samples = 500
    X, y = stream.next_sample(max_samples)
    learner.partial_fit(X, y)

    expected_description = "if Attribute 6 <= 0.1394515530995348:\n" \
                           "  Leaf = Statistics {0: 276.0000, 1: -21537.4157, 2: 11399392.2187}\n" \
                           "if Attribute 6 > 0.1394515530995348:\n" \
                           "  Leaf = Statistics {0: 224.0000, 1: 22964.8868, 2: 10433581.2534}\n"

    assert SequenceMatcher(
        None, expected_description, learner.get_model_description()
    ).ratio() > 0.9
예제 #13
0
def test_evaluate_regression_coverage(tmpdir):
    # A simple coverage test. Tests for metrics are placed in the corresponding test module.
    from skmultiflow.data import RegressionGenerator
    from skmultiflow.trees import RegressionHoeffdingTree

    max_samples = 1000

    # Stream
    stream = RegressionGenerator(n_samples=max_samples)
    stream.prepare_for_use()

    # Learner
    htr = RegressionHoeffdingTree()

    output_file = os.path.join(str(tmpdir), "prequential_summary.csv")
    metrics = ['mean_square_error', 'mean_absolute_error']
    evaluator = EvaluatePrequential(max_samples=max_samples,
                                    metrics=metrics,
                                    output_file=output_file)

    evaluator.evaluate(stream=stream, model=htr, model_names=['HTR'])