Example #1
0
    def testFindFrequency(self):
        dates = list(
            pd.date_range(start='2020-01-01', end='2020-02-01', freq='D'))
        geos = [1, 2, 3, 4]
        df = pd.DataFrame({
            'date': dates * len(geos),
            'geo': sorted(geos * len(dates))
        })
        df.set_index(['geo', 'date'], inplace=True)
        frequency = util.infer_frequency(df, 'date', 'geo')
        self.assertEqual(frequency, 'D')

        weeks = list(
            pd.date_range(start='2020-01-01', end='2020-02-01', freq='W'))
        df = pd.DataFrame({
            'date': weeks * len(geos),
            'geo': sorted(geos * len(weeks))
        })
        df.set_index(['geo', 'date'], inplace=True)
        frequency = util.infer_frequency(df, 'date', 'geo')
        self.assertEqual(frequency, 'W')
Example #2
0
 def testUnknownFrequency(self):
     dates = list(pd.to_datetime(['2020-10-10', '2020-10-13',
                                  '2020-10-16']))
     geos = [1, 2]
     df = pd.DataFrame({
         'date': dates * len(geos),
         'geo': sorted(geos * len(dates))
     })
     df.set_index(['geo', 'date'], inplace=True)
     with self.assertRaises(ValueError) as cm:
         _ = util.infer_frequency(df, 'date', 'geo')
     self.assertEqual(str(cm.exception),
                      'Frequency could not be identified. Got 3 days.')
Example #3
0
 def testFindFrequencyDataNotSorted(self):
     dates = list(
         pd.date_range(start='2020-01-01', end='2020-02-01', freq='D'))
     geos = [1, 2, 3, 4]
     df = pd.DataFrame({
         'date': dates * len(geos),
         'geo': sorted(geos * len(dates))
     })
     # permute the order of the rows, so that the dataset is not sorted by date
     df = df.sample(frac=1, replace=False)
     df.set_index(['geo', 'date'], inplace=True)
     frequency = util.infer_frequency(df, 'date', 'geo')
     self.assertEqual(frequency, 'D')
Example #4
0
 def testDifferentFrequencies(self):
     dates = list(
         pd.date_range(start='2020-01-01', end='2020-02-01', freq='D'))
     weeks = list(
         pd.date_range(start='2020-01-01', end='2020-02-01', freq='W'))
     geos = [1] * len(dates) + [2] * len(weeks)
     df = pd.DataFrame({'date': dates + weeks, 'geo': geos})
     df.set_index(['geo', 'date'], inplace=True)
     with self.assertRaises(ValueError) as cm:
         _ = util.infer_frequency(df, 'date', 'geo')
     self.assertEqual(
         str(cm.exception),
         'The provided time series seem to have irregular frequencies.')
Example #5
0
 def testInsufficientData(self):
     dates = list(
         pd.date_range(start='2020-01-01', end='2020-01-01', freq='D'))
     geos = [1, 2]
     df = pd.DataFrame({
         'date': dates * len(geos),
         'geo': sorted(geos * len(dates))
     })
     df.set_index(['geo', 'date'], inplace=True)
     with self.assertRaises(ValueError) as cm:
         _ = util.infer_frequency(df, 'date', 'geo')
     self.assertEqual(
         str(cm.exception),
         'At least one series with more than one observation must be provided.'
     )