예제 #1
0
def test_nans_filled_with_zero():
    """
    Check that nans are filled with zero
    """
    arr = np.array([np.nan, 1., 2., 3.])
    assert (CHAID.ContinuousColumn(arr).arr == np.array([0., 1., 2.,
                                                         3.])).all()
예제 #2
0
def test_chaid_vector_with_dtype_object():
    """
    Check that error raised when object column supplied
    """
    with pytest.raises(ValueError) as excinfo:
        vector = CHAID.ContinuousColumn(np.array(['2', '4'], dtype="object"))
        assert excinfo.value.message == 'Must only pass numerical values to create continuous column'
예제 #3
0
def test_chaid_vector_converts_strings():
    """
    Check that error raised when string column supplied
    """
    with pytest.raises(ValueError) as excinfo:
        vector = CHAID.ContinuousColumn(np.array(['2', '4']))
        assert excinfo.value.message == 'Must only pass numerical values to create continuous column'
예제 #4
0
def test_dependent_variable_properties_as_members_for_continous_node():
    """
    Tests that node prints the std and mean when contonuous column supplied
    """
    data = np.array([1.3, 23.0, 3.0, 3.0, 12.4])
    continuous_dp = CHAID.ContinuousColumn(data)
    node = CHAID.Node(dep_v=continuous_dp)
    assert node.members == {'mean': data.mean(), 's.t.d': data.std()}
예제 #5
0
def test_node_should_have_a_score():
    """
    Tests that node uses the split score and is called 'score'
    """
    data = np.array([1.3, 23.0, 3.0, 3.0, 12.4])
    continuous_dp = CHAID.ContinuousColumn(data)
    split = CHAID.Split("a", [], 2, 3, 4)
    node = CHAID.Node(dep_v=continuous_dp, split=split)
    assert node.score == 2
예제 #6
0
 def test_p_and_chi_values_for_normal_data(self):
     """
     Check chi and p value against hand calculated values
     """
     split = self.stats_normal_data.best_con_split(
         self.ndarr,
         CHAID.ContinuousColumn(self.normal_arr)
     )
     assert round(split.score, 4) == 2.7346
     assert round(split.p, 4) == 0.0982
     assert split.dof == 118.
예제 #7
0
 def test_p_and_chi_values_for_random_data(self):
     """
     Check chi and p value against hand calculated values
     """
     split = self.stats_random_data.best_con_split(
         self.ndarr,
         CHAID.ContinuousColumn(self.random_arr)
     )
     assert round(split.score, 4) == 1.0588
     assert round(split.p, 4) == 0.3056
     assert split.dof == 118.
예제 #8
0
def test_column_stores_weights():
    """
    Tests that the columns store the weights when they are passed
    """
    arr = np.array([1.0, 2.0, 3.0])
    wt = np.array([2.0, 1.0, 0.25])
    nominal = CHAID.NominalColumn(arr, weights=wt)
    ordinal = CHAID.OrdinalColumn(arr, weights=wt)
    continuous = CHAID.ContinuousColumn(arr, weights=wt)
    assert (nominal.weights == wt).all()
    assert (ordinal.weights == wt).all()
    assert (continuous.weights == wt).all()