def testFindFrequency(self): dates = list( pd.date_range(start='2020-01-01', end='2020-02-01', freq='D')) geos = [1, 2, 3, 4] df = pd.DataFrame({ 'date': dates * len(geos), 'geo': sorted(geos * len(dates)) }) df.set_index(['geo', 'date'], inplace=True) frequency = util.infer_frequency(df, 'date', 'geo') self.assertEqual(frequency, 'D') weeks = list( pd.date_range(start='2020-01-01', end='2020-02-01', freq='W')) df = pd.DataFrame({ 'date': weeks * len(geos), 'geo': sorted(geos * len(weeks)) }) df.set_index(['geo', 'date'], inplace=True) frequency = util.infer_frequency(df, 'date', 'geo') self.assertEqual(frequency, 'W')
def testUnknownFrequency(self): dates = list(pd.to_datetime(['2020-10-10', '2020-10-13', '2020-10-16'])) geos = [1, 2] df = pd.DataFrame({ 'date': dates * len(geos), 'geo': sorted(geos * len(dates)) }) df.set_index(['geo', 'date'], inplace=True) with self.assertRaises(ValueError) as cm: _ = util.infer_frequency(df, 'date', 'geo') self.assertEqual(str(cm.exception), 'Frequency could not be identified. Got 3 days.')
def testFindFrequencyDataNotSorted(self): dates = list( pd.date_range(start='2020-01-01', end='2020-02-01', freq='D')) geos = [1, 2, 3, 4] df = pd.DataFrame({ 'date': dates * len(geos), 'geo': sorted(geos * len(dates)) }) # permute the order of the rows, so that the dataset is not sorted by date df = df.sample(frac=1, replace=False) df.set_index(['geo', 'date'], inplace=True) frequency = util.infer_frequency(df, 'date', 'geo') self.assertEqual(frequency, 'D')
def testDifferentFrequencies(self): dates = list( pd.date_range(start='2020-01-01', end='2020-02-01', freq='D')) weeks = list( pd.date_range(start='2020-01-01', end='2020-02-01', freq='W')) geos = [1] * len(dates) + [2] * len(weeks) df = pd.DataFrame({'date': dates + weeks, 'geo': geos}) df.set_index(['geo', 'date'], inplace=True) with self.assertRaises(ValueError) as cm: _ = util.infer_frequency(df, 'date', 'geo') self.assertEqual( str(cm.exception), 'The provided time series seem to have irregular frequencies.')
def testInsufficientData(self): dates = list( pd.date_range(start='2020-01-01', end='2020-01-01', freq='D')) geos = [1, 2] df = pd.DataFrame({ 'date': dates * len(geos), 'geo': sorted(geos * len(dates)) }) df.set_index(['geo', 'date'], inplace=True) with self.assertRaises(ValueError) as cm: _ = util.infer_frequency(df, 'date', 'geo') self.assertEqual( str(cm.exception), 'At least one series with more than one observation must be provided.' )