def test_nans_filled_with_zero(): """ Check that nans are filled with zero """ arr = np.array([np.nan, 1., 2., 3.]) assert (CHAID.ContinuousColumn(arr).arr == np.array([0., 1., 2., 3.])).all()
def test_chaid_vector_with_dtype_object(): """ Check that error raised when object column supplied """ with pytest.raises(ValueError) as excinfo: vector = CHAID.ContinuousColumn(np.array(['2', '4'], dtype="object")) assert excinfo.value.message == 'Must only pass numerical values to create continuous column'
def test_chaid_vector_converts_strings(): """ Check that error raised when string column supplied """ with pytest.raises(ValueError) as excinfo: vector = CHAID.ContinuousColumn(np.array(['2', '4'])) assert excinfo.value.message == 'Must only pass numerical values to create continuous column'
def test_dependent_variable_properties_as_members_for_continous_node(): """ Tests that node prints the std and mean when contonuous column supplied """ data = np.array([1.3, 23.0, 3.0, 3.0, 12.4]) continuous_dp = CHAID.ContinuousColumn(data) node = CHAID.Node(dep_v=continuous_dp) assert node.members == {'mean': data.mean(), 's.t.d': data.std()}
def test_node_should_have_a_score(): """ Tests that node uses the split score and is called 'score' """ data = np.array([1.3, 23.0, 3.0, 3.0, 12.4]) continuous_dp = CHAID.ContinuousColumn(data) split = CHAID.Split("a", [], 2, 3, 4) node = CHAID.Node(dep_v=continuous_dp, split=split) assert node.score == 2
def test_p_and_chi_values_for_normal_data(self): """ Check chi and p value against hand calculated values """ split = self.stats_normal_data.best_con_split( self.ndarr, CHAID.ContinuousColumn(self.normal_arr) ) assert round(split.score, 4) == 2.7346 assert round(split.p, 4) == 0.0982 assert split.dof == 118.
def test_p_and_chi_values_for_random_data(self): """ Check chi and p value against hand calculated values """ split = self.stats_random_data.best_con_split( self.ndarr, CHAID.ContinuousColumn(self.random_arr) ) assert round(split.score, 4) == 1.0588 assert round(split.p, 4) == 0.3056 assert split.dof == 118.
def test_column_stores_weights(): """ Tests that the columns store the weights when they are passed """ arr = np.array([1.0, 2.0, 3.0]) wt = np.array([2.0, 1.0, 0.25]) nominal = CHAID.NominalColumn(arr, weights=wt) ordinal = CHAID.OrdinalColumn(arr, weights=wt) continuous = CHAID.ContinuousColumn(arr, weights=wt) assert (nominal.weights == wt).all() assert (ordinal.weights == wt).all() assert (continuous.weights == wt).all()