コード例 #1
0
def test_kdd_tree_mixed():
    stream = RandomTreeGenerator(tree_random_state=1,
                                 sample_random_state=1,
                                 n_num_features=0)
    stream.prepare_for_use()

    X, _ = stream.next_sample(1000)
    X_test, _ = stream.next_sample(10)

    # Build tree
    cat_features = [i for i in range(25)]
    kdtree = KDTree(X,
                    metric='mixed',
                    return_distance=True,
                    categorical_list=cat_features)

    # Query tree
    dist, idx = kdtree.query(X_test, 4)

    expected_idx = [[123, 234, 707, 654], [688, 429, 216, 627],
                    [463, 970, 566, 399], [18, 895, 640, 996],
                    [396, 612, 897, 232], [328, 54, 138, 569],
                    [253, 501, 82, 273], [38, 146, 752, 923],
                    [946, 808, 271, 363], [951, 111, 708, 5]]
    expected_dist = [[2, 2, 2, 2], [2, 2, 2, 2], [2, 2, 2, 2], [2, 2, 2, 0],
                     [2, 2, 2, 0], [2, 2, 2, 0], [2, 2, 2, 2], [2, 2, 0, 0],
                     [2, 2, 2, 0], [2, 2, 2, 2]]
    assert np.alltrue(idx == expected_idx)

    assert np.allclose(dist, expected_dist)

    expected_info = 'KDTree: - leaf_size: 40 - metric: mixed - return_distance: True'
    assert kdtree.get_info() == expected_info

    assert kdtree.get_class_type() == 'data_structure'
コード例 #2
0
def test_kdd_tree_mixed():
    stream = RandomTreeGenerator(tree_random_state=1, sample_random_state=1, n_num_features=0)

    X, _ = stream.next_sample(1000)
    X_test, _ = stream.next_sample(10)

    # Build tree
    cat_features = [i for i in range(25)]
    kdtree = KDTree(X, metric='mixed', return_distance=True, categorical_list=cat_features)

    # Query tree
    dist, idx = kdtree.query(X_test, 4)

    expected_idx = [[123, 234, 707, 654],
                    [688, 429, 216, 627],
                    [463, 970, 566, 399],
                    [18, 895, 640, 996],
                    [396, 612, 897, 232],
                    [328, 54, 138, 569],
                    [253, 501, 82, 273],
                    [38, 146, 752, 923],
                    [946, 808, 271, 363],
                    [951, 111, 708, 5]]
    expected_dist = [[2, 2, 2, 2],
                     [2, 2, 2, 2],
                     [2, 2, 2, 2],
                     [2, 2, 2, 0],
                     [2, 2, 2, 0],
                     [2, 2, 2, 0],
                     [2, 2, 2, 2],
                     [2, 2, 0, 0],
                     [2, 2, 2, 0],
                     [2, 2, 2, 2]]
    assert np.alltrue(idx == expected_idx)

    assert np.allclose(dist, expected_dist)

    expected_info = 'KDTree(categorical_list=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, ' \
                    '11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24], ' \
                    'leaf_size=40, metric=mixed, return_distance=True)'
    assert kdtree.get_info() == expected_info

    assert kdtree._estimator_type == 'data_structure'
コード例 #3
0
def test_kdd_tree_euclidean():
    stream = RandomTreeGenerator(tree_random_state=1, sample_random_state=1)

    X, _ = stream.next_sample(1000)
    X_test, _ = stream.next_sample(10)

    # Build tree
    kdtree = KDTree(X, metric='euclidean', return_distance=True)

    # Query tree
    dist, idx = kdtree.query(X_test, 4)

    expected_idx = [[855, 466, 348, 996],
                    [829, 654, 92, 333],
                    [227, 364, 183, 325],
                    [439, 482, 817, 501],
                    [886, 173, 279, 470],
                    [98, 30, 34, 580],
                    [959, 773, 374, 819],
                    [819, 685, 59, 992],
                    [624, 665, 209, 239],
                    [524, 807, 506, 191]]
    expected_dist = [[1.6366216258724973, 1.631437068636607, 1.5408182139320563, 1.4836054196064452],
                     [1.7839579422032452, 1.7694587302438618, 1.5339920309706585, 1.5228981881653287],
                     [1.6512443805072872, 1.637456923425164, 1.61736766513639, 1.5776532815820448],
                     [1.5843121606184263, 1.571918014408251, 1.5038147281265382, 0.7058569455034059],
                     [2.052148026638031, 2.0157953468214007, 1.8012794130725434, 1.6572756455115591],
                     [1.5844032729792423, 1.5688736638121885, 1.55893121879858, 1.4609657517960262],
                     [1.6819916227667229, 1.6186557774269037, 1.5815309744477162, 1.5720184136312232],
                     [1.7302164693989817, 1.5964713159009083, 1.4897849225874815, 1.1629448414734906],
                     [1.6511813695220574, 1.6454651930288255, 1.5926685577827064, 1.4973008307362947],
                     [1.5982346741983797, 1.5875900895982191, 1.4702209684850878, 1.4676217546305874]]

    assert np.alltrue(idx == expected_idx)

    assert np.allclose(dist, expected_dist)

    expected_info = 'KDTree(categorical_list=None, leaf_size=40, metric=euclidean, return_distance=True)'
    assert kdtree.get_info() == expected_info

    assert kdtree._estimator_type == 'data_structure'