def test_check_int(self): """ Checks if number is integer. :return: """ true_asserts = [ 1, 1345, -13, 0, -0, # numeric values "1" # strings ] for assert_val in true_asserts: self.assertTrue(NumericStatsMixin.is_int(assert_val)) false_asserts = [ 1.3, # float float("nan"), np.nan, # nan value "nan", "1a", "abc", "", "1.3" # strings ] for assert_val in false_asserts: self.assertFalse(NumericStatsMixin.is_int(assert_val))
def test_base(self): # validate requires NumericalOptions with self.assertRaisesRegex( ValueError, "NumericalStatsMixin parameter 'options' " "must be of type NumericalOptions."): profile = NumericStatsMixin(options='bad options') try: # validate doesn't fail profile = NumericStatsMixin() profile = NumericStatsMixin(NumericalOptions()) except Exception as e: self.fail(e)
def test_get_percentile_median(self): num_profiler = TestColumn() # Dummy data for calculating bin error num_profiler._stored_histogram = { "histogram": { "bin_counts": np.array([1, 2, 0, 2, 1]), "bin_edges": np.array([0.0, 4.0, 8.0, 12.0, 16.0, 20.0]), } } median = NumericStatsMixin._get_percentile(num_profiler, percentiles=[50, 50]) self.assertListEqual([10, 10], median)
def test_report(self): options = NumericalOptions() options.max.is_enabled = False options.min.is_enabled = False options.histogram_and_quantiles.is_enabled = False options.variance.is_enabled = False num_profiler = NumericStatsMixin(options=options) num_profiler.match_count = 0 num_profiler.times = defaultdict(float) report = num_profiler.report(remove_disabled_flag=True) report_keys = list(report.keys()) for disabled_key in [ "max", "min", "variance", "histogram", "quantiles" ]: self.assertNotIn(disabled_key, report_keys) # test report default `remove_disabled_flag` # value and no NumericalOptions report = num_profiler.report() report_keys = list(report.keys()) for disabled_key in [ "max", "min", "variance", "histogram", "quantiles" ]: self.assertIn(disabled_key, report_keys)
def test_check_float(self): """ Checks if number is float. :return: """ true_asserts = [ 1.3, 1.345, -1.3, 0.03, 0.0, -0.0, 1, # numeric values float("nan"), np.nan, # nan values "1.3", "nan" # strings ] for assert_val in true_asserts: self.assertTrue(NumericStatsMixin.is_float(assert_val)) false_asserts = ["1.3a", "abc", "", "1.23.45"] for assert_val in false_asserts: self.assertFalse(NumericStatsMixin.is_float(assert_val))
def __init__(self): NumericStatsMixin.__init__(self) self.match_count = 0 self.times = defaultdict(float)