Esempio n. 1
0
def test_best_split_with_combination_combining_if_too_small():
    """
    Test passing in a perfect split data, with a single catagory merges needed
    """
    arr = np.array(([1] * 5) + ([2] * 10))
    orig_arr = arr.copy()
    ndarr = np.array(([1, 2, 3] * 5) + ([2, 2, 3] * 3) + ([3, 2, 3] * 5) +
                     [1, 2, 3] * 2).reshape(15, 3)
    orig_ndarr = ndarr.copy()
    tree = CHAID.Tree.from_numpy(ndarr,
                                 arr,
                                 min_child_node_size=5,
                                 alpha_merge=0.055)

    split = tree.generate_best_split(tree.vectorised_array, tree.observed)
    assert list_ordered_equal(
        ndarr, orig_ndarr
    ), 'Calling chaid should have no side affects for original numpy arrays'
    assert list_ordered_equal(
        arr, orig_arr
    ), 'Calling chaid should have no side affects for original numpy arrays'
    assert split.column_id == 0, 'Identifies correct column to split on'
    assert list_unordered_equal(
        split.split_map, [[1], [2, 3]]), 'Correctly identifies categories'
    assert list_unordered_equal(split.surrogates,
                                []), 'No surrogates should be generated'
    assert split.p < 0.055
Esempio n. 2
0
def test_spliting_identical_values():
    """
    Test that passing in identical data cannot be split
    """
    arr = np.array(([1] * 5) + ([1] * 5))
    orig_arr = arr.copy()
    ndarr = np.array(([1, 2, 3] * 5) + ([2, 2, 3] * 5)).reshape(10, 3)
    orig_ndarr = ndarr.copy()
    tree = CHAID.Tree.from_numpy(ndarr, arr, min_child_node_size=0)

    split = tree.generate_best_split(tree.vectorised_array, tree.observed)
    assert list_ordered_equal(ndarr, orig_ndarr), \
        'Calling chaid should have no side affects for original numpy arrays'
    assert list_ordered_equal(arr, orig_arr), \
        'Identifies correct column to split on'
    assert not split.valid(), \
        'Should not be able to split data with no skew'
Esempio n. 3
0
def test_spliting_identical_values():
    """
    Test that passing in identical data cannot be split
    """
    arr = np.array(([1] * 5) + ([1] * 5))
    orig_arr = arr.copy()
    ndarr = np.array(([1, 2, 3] * 5) + ([2, 2, 3] * 5)).reshape(10, 3)
    orig_ndarr = ndarr.copy()
    tree = CHAID.Tree.from_numpy(ndarr, arr, min_child_node_size=0)

    split = tree.generate_best_split(
        tree.vectorised_array,
        tree.observed
    )
    assert list_ordered_equal(ndarr, orig_ndarr), \
        'Calling chaid should have no side affects for original numpy arrays'
    assert list_ordered_equal(arr, orig_arr), \
        'Identifies correct column to split on'
    assert not split.valid(), \
        'Should not be able to split data with no skew'
Esempio n. 4
0
def test_best_split_unique_values():
    """
    Test passing in a perfect split data, with no catagory merges needed
    """
    arr = np.array(([1] * 5) + ([2] * 5))
    orig_arr = arr.copy()
    ndarr = np.array(([1, 2, 3] * 5) + ([2, 2, 3] * 5)).reshape(10, 3)
    orig_ndarr = ndarr.copy()
    tree = CHAID.Tree.from_numpy(ndarr, arr, min_child_node_size=0)

    split = tree.generate_best_split(
        tree.vectorised_array,
        tree.observed
    )
    assert list_ordered_equal(ndarr, orig_ndarr), 'Calling chaid should have no side affects for original numpy arrays'
    assert list_ordered_equal(arr, orig_arr), 'Calling chaid should have no side affects for original numpy arrays'
    assert split.column_id == 0, 'Identifies correct column to split on'
    assert list_unordered_equal(split.split_map, [[1], [2]]), 'Correctly identifies catagories'
    assert list_unordered_equal(split.surrogates, []), 'No surrogates should be generated'
    assert split.p < 0.015
Esempio n. 5
0
def test_best_split_with_combination_combining_if_too_small():
    """
    Test passing in a perfect split data, with a single catagory merges needed
    """
    arr = np.array(([1] * 5) + ([2] * 10))
    orig_arr = arr.copy()
    ndarr = np.array(([1, 2, 3] * 5) + ([2, 2, 3] * 3) + ([3, 2, 3] * 5) + [1, 2, 3] * 2).reshape(15, 3)
    orig_ndarr = ndarr.copy()
    tree = CHAID.Tree.from_numpy(ndarr, arr, min_child_node_size=5, alpha_merge=0.055)

    split = tree.generate_best_split(
        tree.vectorised_array,
        tree.observed
    )
    assert list_ordered_equal(ndarr, orig_ndarr), 'Calling chaid should have no side affects for original numpy arrays'
    assert list_ordered_equal(arr, orig_arr), 'Calling chaid should have no side affects for original numpy arrays'
    assert split.column_id == 0, 'Identifies correct column to split on'
    assert list_unordered_equal(split.split_map, [[1], [2, 3]]), 'Correctly identifies categories'
    assert list_unordered_equal(split.surrogates, []), 'No surrogates should be generated'
    assert split.p < 0.055
Esempio n. 6
0
def test_best_split_unique_values():
    """
    Test passing in a perfect split data, with no catagory merges needed
    """
    arr = np.array(([1] * 5) + ([2] * 5))
    orig_arr = arr.copy()
    ndarr = np.array(([1, 2, 3] * 5) + ([2, 2, 3] * 5)).reshape(10, 3)
    orig_ndarr = ndarr.copy()
    tree = CHAID.Tree.from_numpy(ndarr, arr, min_child_node_size=0)

    split = tree.generate_best_split(
        tree.vectorised_array,
        tree.observed
    )
    assert list_ordered_equal(ndarr, orig_ndarr), 'Calling chaid should have no side affects for original numpy arrays'
    assert list_ordered_equal(arr, orig_arr), 'Calling chaid should have no side affects for original numpy arrays'
    assert split.column_id == 0, 'Identifies correct column to split on'
    assert list_unordered_equal(split.split_map, [[1], [2]]), 'Correctly identifies catagories'
    assert list_unordered_equal(split.surrogates, []), 'No surrogates should be generated'
    assert split.p < 0.015
 def test_changing_copy(self):
     """ Test that altering the copy doesn't alter the original """
     self.copy.arr[0] = 55.0
     assert not list_ordered_equal(self.copy, self.orig), 'Altering one vector should not affect the other'
 def test_deep_copy_does_copy(self):
     """ Ensure a copy actually happens when deep_copy is called """
     assert id(self.orig) != id(self.copy), 'The vector objects must be different'
     assert list_ordered_equal(self.copy, self.orig), 'Vector contents must be the same'
Esempio n. 9
0
 def test_changing_copy(self):
     """ Test that altering the copy doesn't alter the original """
     self.copy.arr[0] = 55.0
     assert not list_ordered_equal(self.copy, self.orig), 'Altering one vector should not affected the other'
Esempio n. 10
0
 def test_deep_copy_does_copy(self):
     """ Ensure a copy actually happens when deep_copy is called """
     assert id(self.orig) != id(self.copy), 'The vector objects must be different'
     assert list_ordered_equal(self.copy, self.orig), 'Vector contents must be the same'