Python CHAID.Tree Examples

Programming Language: Python

Namespace/Package Name: setup_tests

Class/Type: CHAID

Method/Function: Tree

Examples at hotexamples.com: 12

Python CHAID.Tree - 12 examples found. These are the top rated real world Python examples of setup_tests.CHAID.Tree extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Tree(12)

NominalColumn(11)

ContinuousColumn(8)

OrdinalColumn(5)

Split(5)

Node(3)

Stats(1)

Example #1

Show file

def test_best_split_with_combination():
    """
    Test passing in a perfect split data, with a single catagory merges needed
    """
    arr = np.array(([1] * 5) + ([2] * 10))
    orig_arr = arr.copy()
    ndarr = np.array(([1, 2, 3] * 5) + ([2, 2, 3] * 5) +
                     ([3, 2, 3] * 5)).reshape(15, 3)
    orig_ndarr = ndarr.copy()
    tree = CHAID.Tree(ndarr, arr)

    split = tree.generate_best_split(tree.vectorised_array, tree.observed)

    assert list_ordered_equal(
        ndarr, orig_ndarr
    ), 'Calling chaid should have no side affects for original numpy arrays'
    assert list_ordered_equal(
        arr, orig_arr
    ), 'Calling chaid should have no side affects for original numpy arrays'
    assert split.column_id == 0, 'Identifies correct column to split on'
    assert list_unordered_equal(
        split.split_map, [[1], [2], [3]]), 'Correctly identifies catagories'
    assert list_unordered_equal(split.surrogates,
                                []), 'No surrogates should be generated'
    assert split.p < 0.015

Example #2

Show file

def test_p_and_chi_values():
    """
    Check chi and p value against hand calculated values
    """
    arr = np.array(([1] * 3) + ([2] * 4))
    ndarr = np.array(([1] * 4) + ([2] * 3)).reshape(7, 1)

    tree = CHAID.Tree(ndarr, arr, split_threshold=0.9)

    split = tree.generate_best_split(tree.vectorised_array, tree.observed)
    assert round(split.chi, 4) == 3.9375
    assert round(split.p, 4) == 0.0472

Example #3

Show file

 def test_min_child_node_size_does_not_stop_for_unweighted_case(self):
     """
     Check that minumun child node size causes the tree to
     terminate correctly
     """
     tree = CHAID.Tree(self.ndarr,
                       self.arr,
                       alpha_merge=0.999,
                       max_depth=5,
                       min_child_node_size=10)
     tree.build_tree()
     assert len(tree.tree_store) == 4

Example #4

Show file

    def test_surrogate_default_min_p(self):
        """
        Test that chaid selects min p split
        """
        tree = CHAID.Tree(self.ndarr, self.arr, split_threshold=0.9)

        split = tree.generate_best_split(tree.vectorised_array, tree.observed,
                                         None)

        assert split.p < split.surrogates[
            0].p, 'The best split should be the minimum p by default'
        assert split.chi > split.surrogates[
            0].chi, 'The data picked should not allow picked split to have both p and chi less than the surrogate'

Example #5

Show file

    def test_surrgate_detection(self):
        """
        Test passing in data, in which a surrogate split exists
        """
        tree = CHAID.Tree(self.ndarr, self.arr, split_threshold=0.9)

        split = tree.generate_best_split(tree.vectorised_array, tree.observed,
                                         None)

        assert split.column_id == 1, 'The best split should be on column 1'
        assert len(split.surrogates
                   ) == 1, 'There should be a single surrogate in given data'
        assert split.surrogates[
            0].column_id == 0, 'The surrogate should be on column 0'

Example #6

Show file

 def test_incorrect_weighted_counts(self):
     """
     Fix bug wherby the weights was using the class weights
     and not the sliced weights in node()
     """
     tree = CHAID.Tree(self.ndarr,
                       self.arr,
                       alpha_merge=0.999,
                       weights=self.wt,
                       max_depth=5,
                       min_parent_node_size=2)
     tree.build_tree()
     assert tree.tree_store[3].members == {1: 0, 2: 1.2}
     assert tree.tree_store[5].members == {1: 5.0, 2: 6.0}

Example #7

Show file

File: test_tree.py Project: tongliuliu/CHAID

def test_new_columns_constructor():
    """
    Test the new tree constructor that takes CHAID Columns as parameters
    """
    orientation = np.array([0,0,1,1,0,0,1,1,0,0,1,2,2,2,2,2,2,2,2,1])
    age = np.array([0,1,1,0,2,2,2,2,1,1,1,0,0,0,0,0,0,0,0,0])
    income = np.array([0,0,1,1,2,0,1,1,1,0,1,0,0,0,0,0,0,0,0,0])
    metadata = {0: '0-5', 1: '6-10', 2: '11-15'}
    cols = [
        CHAID.OrdinalColumn(orientation, name="orientation", metadata=metadata),
        CHAID.OrdinalColumn(age, name="age", metadata=metadata),
    ]
    tree = CHAID.Tree(cols, CHAID.NominalColumn(income), {'min_child_node_size': 1})
    assert tree.tree_store[0].split.groupings == "[['0-5'], ['6-10', '11-15']]"

Example #8

Show file

def test_correct_dof():
    """
    Check the degrees of freedom is correct
    """
    gender = np.array(
        [0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2])
    income = np.array(
        [0, 0, 1, 0, 2, 0, 1, 2, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0])

    ndarr = np.transpose(np.vstack([gender]))

    tree = CHAID.Tree(ndarr, income, alpha_merge=0.9)

    split = tree.generate_best_split(tree.vectorised_array, tree.observed)

    assert split.dof == (len(set(gender)) - 1) * (len(set(income)) - 1)

Example #9

Show file

def test_spliting_identical_values():
    """
    Test that passing in identical data cannot be split
    """
    arr = np.array(([1] * 5) + ([1] * 5))
    orig_arr = arr.copy()
    ndarr = np.array(([1, 2, 3] * 5) + ([2, 2, 3] * 5)).reshape(10, 3)
    orig_ndarr = ndarr.copy()
    tree = CHAID.Tree(ndarr, arr)

    split = tree.generate_best_split(tree.vectorised_array, tree.observed)
    assert list_ordered_equal(ndarr, orig_ndarr), \
        'Calling chaid should have no side affects for original numpy arrays'
    assert list_ordered_equal(arr, orig_arr), \
        'Identifies correct column to split on'
    assert not split.valid(), \
        'Should not be able to split data with no skew'

Example #10

Show file

def test_p_and_chi_values_when_weighting_applied():
    """
    Check chi and p value when weights supplied
    """
    gender = np.array([0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1])
    income = np.array([0, 0, 1, 0, 2, 0, 1, 2, 1, 0, 1])

    weighting = np.array(
        [0.9, 0.8, 0.9, 1.1, 1.2, 0.8, 1.3, 0.2, 0.5, 0.7, 1.1])
    ndarr = np.transpose(np.vstack([gender]))

    tree = CHAID.Tree(ndarr, income, alpha_merge=0.9, weights=weighting)

    split = tree.generate_best_split(tree.vectorised_array, tree.observed,
                                     weighting)

    assert round(split.chi, 4) == 1.6179
    assert round(split.p, 4) == 0.4453

Example #11

Show file

def test_zero_subbed_weighted_ndarry():
    """
    Test how the split works when 0 independent categorical variable chooses a dependent categorical variable for the weighted case.
    In this instance, a very small float is assigned to the 0 value
    """
    gender = np.array(
        [0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1])
    income = np.array(
        [0, 0, 1, 1, 2, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0])
    weighting = np.array(([0.9] * int(len(gender) / 2.0)) +
                         ([1.9] * int(len(gender) / 2.0)))

    ndarr = np.transpose(np.vstack([gender]))

    tree = CHAID.Tree(ndarr, income, alpha_merge=0.9, weights=weighting)

    split = tree.generate_best_split(tree.vectorised_array, tree.observed,
                                     weighting)

    assert round(split.chi, 4) == 14.5103
    assert round(split.p, 4) == 0.0007

Example #12

Show file

 def setUp(self):
     """ Set up for tree generation tests """
     arr = np.array(([1] * 5) + ([2] * 5))
     ndarr = np.array(([1, 2, 3] * 5) + ([2, 2, 3] * 5)).reshape(10, 3)
     self.tree = CHAID.Tree(ndarr, arr)