def test_pandas_inferable_temporal_frequencies(case): input_series = case["dates"] expected_infer_freq = case["expected_infer_freq"] inferred_freq = infer_frequency(observed_ts=input_series, debug=False) assert inferred_freq == expected_infer_freq
def test_error_messages(case): input_series = case["dates"] expected_debug_obj = case["expected_debug_obj"] inferred_freq, actual_debug_obj = infer_frequency(observed_ts=input_series, debug=True) assert inferred_freq is None assert actual_debug_obj == expected_debug_obj
def test_inferable_temporal_frequencies_cases(case): input_series = case["dates"] expected_debug_obj = case["expected_debug_obj"] inferred_freq, actual_debug_obj = infer_frequency(observed_ts=input_series, debug=True) assert inferred_freq is None assert actual_debug_obj == expected_debug_obj
def test_inferable_temporal_frequencies_duplicates(freq, error_range): # strip off first element, since it probably doesn't agree with freq dates = ((pd.date_range("2005-01-01", periods=1001, freq=freq)[1:]).to_series().reset_index(drop=True)) actual_range_start = dates.loc[0].isoformat() actual_range_end = dates.loc[len(dates) - 1].isoformat() idx = len(dates) // 2 d = dates[idx].isoformat() dates_observed = pd.concat([ dates[:idx + 1], pd.Series(np.full((error_range, ), dates[idx])), dates[idx + 1:], ]) dates_observed = dates_observed.reset_index(drop=True) expected_debug_obj = { "actual_range_start": actual_range_start, "actual_range_end": actual_range_end, "message": None, "estimated_freq": freq, "estimated_range_start": dates_observed.loc[0].isoformat(), "estimated_range_end": dates_observed.loc[len(dates_observed) - 1].isoformat(), "duplicate_values": [ { "dt": d, "idx": idx + 1, "range": error_range }, ], "missing_values": [], "extra_values": [], "nan_values": [], } inferred_freq, actual_debug_obj = infer_frequency( observed_ts=dates_observed, debug=True) assert inferred_freq is None assert actual_debug_obj == expected_debug_obj