def test__chi_square__not_providing_data_fails(self): """ Value error raised when not providing data. """ # Check if error is raised for None data with self.assertRaises(ValueError): statx.chi_square(self.samples.temperature, None) # Check if error is raised for None data with self.assertRaises(ValueError): statx.chi_square(None, self.samples.temperature)
def test__chi_square__computation_one_bin_not_present(self): """ Check if p-value is correct for test data from pandas manual page. """ # Create test data: a = ['A'] * 16 + ['B'] * 18 + ['C'] * 16 + ['D'] * 14 + ['E'] * 12 + ['F'] * 12 b = ['A'] * 16 + ['B'] * 16 + ['C'] * 16 + ['D'] * 16 + ['E'] * 16 + ['F'] * 8 # Computation of chi-square p-value (a is shortened) self.assertAlmostEqual(0.94879980715092971, statx.chi_square(a[0:-12], b)[0]) # Computation of chi-square p-value (b is shortened) self.assertAlmostEqual(0.94879980715092971, statx.chi_square(a, b[0:-8])[0])
def test__chi_square__computation_symmetric(self): """ Check if p-value is roughly symmetric. """ # Create test data: a = ['A'] * 16 + ['B'] * 18 + ['C'] * 16 + ['D'] * 14 + ['E'] * 12 + ['F'] * 12 b = ['A'] * 16 + ['B'] * 16 + ['C'] * 16 + ['D'] * 16 + ['E'] * 16 + ['F'] * 8 # Computation of chi-square p-value (a is shortened) self.assertAlmostEqual(statx.chi_square(a, b), statx.chi_square(b, a)) # Computation of chi-square p-value (b is shortened) aa = statx.chi_square(b[0:(-8)], a) bb = statx.chi_square(a, b[0:(-8)]) self.assertAlmostEqual(aa[0], bb[0]) # p-value self.assertAlmostEqual(aa[1], bb[1]) # chi-square value
def run_goodness_of_fit_test(self, observed_freqs, expected_freqs, alpha=0.01, min_counts=5): """ Checks the validity of observed and expected counts and runs chi-square test for goodness of fit. :param observed_freqs: observed frequencies :type observed_freqs: pd.Series :param expected_freqs: expected frequencies :type expected_freqs: pd.Series :param alpha: significance level :type alpha: float :param min_counts: minimum number of observations to run chi-square test :type min_counts: int :return split_is_unbiased: False is split is biased and True if split is correct p_value: corresponding chi-square p-value :rtype: bool, float """ if not isinstance(observed_freqs, pd.Series) or not isinstance(expected_freqs, pd.Series): raise ValueError("Observed and expected frequencies should be of type Series.") if observed_freqs.empty or expected_freqs.empty: raise ValueError("Variant split check was cancelled since expected or observed frequencies are empty.") # Ensure at least a frequency of min_counts at every location in observed_counts. # It's recommended to not conduct test if frequencies in each category is less than min_counts valid_observed_freqs = observed_freqs[observed_freqs > min_counts] valid_expected_freqs = expected_freqs.filter(valid_observed_freqs.keys()) if len(valid_observed_freqs) == len(valid_expected_freqs) and len(valid_observed_freqs) >= 2: _, p_value = statx.chi_square(valid_observed_freqs.sort_index(), valid_expected_freqs.sort_index()) split_is_unbiased = p_value >= alpha else: raise ValueError("Variant split check was cancelled since observed or expected frequencies " "are less than 2.") return split_is_unbiased, p_value
def test__chi_square__computation_same_data(self): """ Check if p-value is 1.0 for same data entered twice. """ # Computation of chi-square p-value self.assertEqual(1.0, statx.chi_square(self.samples.temperature, self.samples.temperature)[0])
def test__chi_square__computation_different_data(self): """ Check if p-value is correct for test data from pandas manual page. """ # Create test data: a = ['A'] * 16 + ['B'] * 18 + ['C'] * 16 + ['D'] * 14 + ['E'] * 12 + ['F'] * 12 b = ['A'] * 16 + ['B'] * 16 + ['C'] * 16 + ['D'] * 16 + ['E'] * 16 + ['F'] * 8 # Computation of chi-square p-value self.assertAlmostEqual(0.89852623940266074, statx.chi_square(a, b)[0])
def test__chi_square__computation_different_data_as_in_statistics_book(self): """ Check if p-value is correct for test data from statistics book Fahrmeir et al. (2007) pp. 463. """ # Create test data: a = ['nein'] * 139 + ['gut'] * 348 + ['mittel'] * 213 b = ['nein'] * 135 + ['gut'] * 46 + ['mittel'] * 119 # Computation of chi-square p-value p, chisq, nattr = statx.chi_square(a, b) self.assertAlmostEqual(116.851, chisq, delta=0.001) self.assertAlmostEqual(0.0, p, delta=0.00000000001)
def test__chi_square__computation_different_data_as_in_open_statistics_book(self): """ Check if p-value is correct for test data from open statistics book 3rd ed pp. 299. (https://www.openintro.org/stat/textbook.php) """ # Create test data: a = ['cu'] * 3511 + ['t1'] * 1749 + ['t2'] * 1818 b = ['cu'] * 1489 + ['t1'] * 751 + ['t2'] * 682 # Computation of chi-square p-value p, chisq, nattr = statx.chi_square(a, b) self.assertAlmostEqual(6.120, chisq, delta=0.001) self.assertAlmostEqual(0.0469, p, delta=0.0001)
def run_goodness_of_fit_test(self, observed_freqs, expected_freqs, alpha=0.01, min_counts=5): """ Checks the validity of observed and expected counts and runs chi-square test for goodness of fit. :param observed_freqs: observed frequencies :type observed_freqs: pd.Series :param expected_freqs: expected frequencies :type expected_freqs: pd.Series :param alpha: significance level :type alpha: float :param min_counts: minimum number of observations to run chi-square test :type min_counts: int :return split_is_unbiased: False is split is biased and True if split is correct p_value: corresponding chi-square p-value :rtype: bool, float """ if not isinstance(observed_freqs, pd.Series) or not isinstance( expected_freqs, pd.Series): raise ValueError( "Observed and expected frequencies should be of type Series.") if observed_freqs.empty or expected_freqs.empty: raise ValueError( "Variant split check was cancelled since expected or observed frequencies are empty." ) # Ensure at least a frequency of min_counts at every location in observed_counts. # It's recommended to not conduct test if frequencies in each category is less than min_counts valid_observed_freqs = observed_freqs[observed_freqs > min_counts] valid_expected_freqs = expected_freqs.filter( valid_observed_freqs.keys()) if len(valid_observed_freqs) == len( valid_expected_freqs) and len(valid_observed_freqs) >= 2: _, p_value = statx.chi_square(valid_observed_freqs.sort_index(), valid_expected_freqs.sort_index()) split_is_unbiased = p_value >= alpha else: raise ValueError( "Variant split check was cancelled since observed or expected frequencies " "are less than 2.") return split_is_unbiased, p_value
def do_delta_categorical(df): pval = statx.chi_square(x=df.iloc[:, 2], y=baseline_metric)[0] ss_x = statx.sample_size(df.iloc[:, 2]) return feature_check_to_dataframe(metric=df.columns[2], samplesize_variant=ss_x, pval=pval)
def chi_square_test_result_and_statistics(self, variant_column, weights, min_counts=5, alpha=0.05): """ Tests the consistency of variant split with the hypothesized distribution. :param variant_column: variant column from the input data frame :param weights: dict with variant names as keys, weights as values ({<variant_name>:<weight>, ...} :param min_counts: minimum number of observed and expected frequencies (should be at least 5), see http://docs.scipy.org/doc/scipy-0.16.1/reference/generated/scipy.stats.chisquare.html :param alpha: significance level, 0.05 by default :return: True(if split is consistent with the given split) or False(if split is not consistent with the given split) :rtype: Boolean, float, float """ if not hasattr(variant_column, '__len__'): raise ValueError( "Variant split check was cancelled since input variant column is empty or doesn't exist." ) if not hasattr(weights, '__len__'): raise ValueError( "Variant split check was cancelled since input weights are empty or doesn't exist." ) if len(weights) <= 1 or len(variant_column) <= 1: raise ValueError( "Variant split check was cancelled since input weights or the number if categories " "is less than 2.") # Count number of observations per each variant variant_column = pd.Series(variant_column).dropna(axis=0) observed_freqs = variant_column.value_counts() # Ensure at least a frequency of min_counts at every location in observed_counts. # It's recommended to not conduct test if frequencies in each category is less than min_counts if len(observed_freqs[observed_freqs < min_counts]) >= 1: raise ValueError( "Chi-square test is not valid for small expected or observed frequencies." ) # If there are less than 2 categories left after dropping counts less than 5 we can't conduct the test. if len(observed_freqs) < 2: raise ValueError( "If the number of categories is less than 2 Chi-square test is not applicable." ) # Calculate expected counts given corresponding weights, # weights are filtered out of categories which were dropped before. total_count = observed_freqs.sum() weights = { k: v for (k, v) in weights.items() if k in observed_freqs.index.values } expected_freqs = pd.Series(weights) expected_freqs *= total_count # Compute chi-square and p-value statistics chi_square_val, p_val = statx.chi_square(observed_freqs.sort_index(), expected_freqs.sort_index()) return p_val >= alpha, p_val, chi_square_val