def test_nearest_unequal_elements(self, tz): dts = safe_tz_localize( pd.to_datetime( ['2014-01-01', '2014-01-05', '2014-01-06', '2014-01-09']), tz) def t(s): return None if s is None else pd.Timestamp(s, tz=tz) for dt, before, after in (('2013-12-30', None, '2014-01-01'), ('2013-12-31', None, '2014-01-01'), ('2014-01-01', None, '2014-01-05'), ('2014-01-02', '2014-01-01', '2014-01-05'), ('2014-01-03', '2014-01-01', '2014-01-05'), ('2014-01-04', '2014-01-01', '2014-01-05'), ('2014-01-05', '2014-01-01', '2014-01-06'), ('2014-01-06', '2014-01-05', '2014-01-09'), ('2014-01-07', '2014-01-06', '2014-01-09'), ('2014-01-08', '2014-01-06', '2014-01-09'), ('2014-01-09', '2014-01-06', None), ('2014-01-10', '2014-01-09', None), ('2014-01-11', '2014-01-09', None)): computed = nearest_unequal_elements(dts, t(dt)) expected = (t(before), t(after)) self.assertEqual(computed, expected)
def test_nearest_unequal_elements(self, tz): dts = pd.to_datetime( ['2014-01-01', '2014-01-05', '2014-01-06', '2014-01-09'], ).tz_localize(tz) def t(s): return None if s is None else pd.Timestamp(s, tz=tz) for dt, before, after in (('2013-12-30', None, '2014-01-01'), ('2013-12-31', None, '2014-01-01'), ('2014-01-01', None, '2014-01-05'), ('2014-01-02', '2014-01-01', '2014-01-05'), ('2014-01-03', '2014-01-01', '2014-01-05'), ('2014-01-04', '2014-01-01', '2014-01-05'), ('2014-01-05', '2014-01-01', '2014-01-06'), ('2014-01-06', '2014-01-05', '2014-01-09'), ('2014-01-07', '2014-01-06', '2014-01-09'), ('2014-01-08', '2014-01-06', '2014-01-09'), ('2014-01-09', '2014-01-06', None), ('2014-01-10', '2014-01-09', None), ('2014-01-11', '2014-01-09', None)): computed = nearest_unequal_elements(dts, t(dt)) expected = (t(before), t(after)) self.assertEqual(computed, expected)
def test_nearest_unequal_bad_input(self): with self.assertRaises(ValueError) as e: nearest_unequal_elements( pd.to_datetime(['2014', '2014']), pd.Timestamp('2014'), ) self.assertEqual(str(e.exception), 'dts must be unique') with self.assertRaises(ValueError) as e: nearest_unequal_elements( pd.to_datetime(['2014', '2013']), pd.Timestamp('2014'), ) self.assertEqual( str(e.exception), 'dts must be sorted in increasing order', )
def test_nearest_unequal_elements_short_dts(self, tz): # Length 1. dts = safe_tz_localize(pd.to_datetime(['2014-01-01']), tz) def t(s): return None if s is None else pd.Timestamp(s, tz=tz) for dt, before, after in (('2013-12-31', None, '2014-01-01'), ('2014-01-01', None, None), ('2014-01-02', '2014-01-01', None)): computed = nearest_unequal_elements(dts, t(dt)) expected = (t(before), t(after)) self.assertEqual(computed, expected) # Length 0 dts = safe_tz_localize(pd.to_datetime([]), tz) for dt, before, after in (('2013-12-31', None, None), ('2014-01-01', None, None), ('2014-01-02', None, None)): computed = nearest_unequal_elements(dts, t(dt)) expected = (t(before), t(after)) self.assertEqual(computed, expected)
def test_nearest_unequal_elements_short_dts(self, tz): # Length 1. dts = pd.to_datetime(['2014-01-01']).tz_localize(tz) def t(s): return None if s is None else pd.Timestamp(s, tz=tz) for dt, before, after in (('2013-12-31', None, '2014-01-01'), ('2014-01-01', None, None), ('2014-01-02', '2014-01-01', None)): computed = nearest_unequal_elements(dts, t(dt)) expected = (t(before), t(after)) self.assertEqual(computed, expected) # Length 0 dts = pd.to_datetime([]).tz_localize(tz) for dt, before, after in (('2013-12-31', None, None), ('2014-01-01', None, None), ('2014-01-02', None, None)): computed = nearest_unequal_elements(dts, t(dt)) expected = (t(before), t(after)) self.assertEqual(computed, expected)
def compute_extra_rows(self, all_dates, start_date, end_date, min_extra_rows): """ Ensure that min_extra_rows pushes us back to a computation date. Parameters ---------- all_dates : pd.DatetimeIndex The trading sessions against which ``self`` will be computed. start_date : pd.Timestamp The first date for which final output is requested. end_date : pd.Timestamp The last date for which final output is requested. min_extra_rows : int The minimum number of extra rows required of ``self``, as determined by other terms that depend on ``self``. Returns ------- extra_rows : int The number of extra rows to compute. This will be the minimum number of rows required to make our computed start_date fall on a recomputation date. """ try: current_start_pos = all_dates.get_loc(start_date) - min_extra_rows if current_start_pos < 0: raise NoFurtherDataError( initial_message="Insufficient data to compute Pipeline:", first_date=all_dates[0], lookback_start=start_date, lookback_length=min_extra_rows, ) except KeyError: before, after = nearest_unequal_elements(all_dates, start_date) raise ValueError( "Pipeline start_date {start_date} is not in calendar.\n" "Latest date before start_date is {before}.\n" "Earliest date after start_date is {after}.".format( start_date=start_date, before=before, after=after, ) ) # Our possible target dates are all the dates on or before the current # starting position. # TODO: Consider bounding this below by self.window_length candidates = all_dates[:current_start_pos + 1] # Choose the latest date in the candidates that is the start of a new # period at our frequency. choices = select_sampling_indices(candidates, self._frequency) # If we have choices, the last choice is the first date if the # period containing current_start_date. Choose it. new_start_date = candidates[choices[-1]] # Add the difference between the new and old start dates to get the # number of rows for the new start_date. new_start_pos = all_dates.get_loc(new_start_date) assert new_start_pos <= current_start_pos, \ "Computed negative extra rows!" return min_extra_rows + (current_start_pos - new_start_pos)
def compute_extra_rows(self, all_dates, start_date, end_date, min_extra_rows): """ Ensure that min_extra_rows pushes us back to a computation date. Parameters ---------- all_dates : pd.DatetimeIndex The trading sessions against which ``self`` will be computed. start_date : pd.Timestamp The first date for which final output is requested. end_date : pd.Timestamp The last date for which final output is requested. min_extra_rows : int The minimum number of extra rows required of ``self``, as determined by other terms that depend on ``self``. Returns ------- extra_rows : int The number of extra rows to compute. This will be the minimum number of rows required to make our computed start_date fall on a recomputation date. """ try: current_start_pos = all_dates.get_loc(start_date) - min_extra_rows if current_start_pos < 0: raise NoFurtherDataError( initial_message="Insufficient data to compute Pipeline:", first_date=all_dates[0], lookback_start=start_date, lookback_length=min_extra_rows, ) except KeyError: before, after = nearest_unequal_elements(all_dates, start_date) raise ValueError( "Pipeline start_date {start_date} is not in calendar.\n" "Latest date before start_date is {before}.\n" "Earliest date after start_date is {after}.".format( start_date=start_date, before=before, after=after, )) # Our possible target dates are all the dates on or before the current # starting position. # TODO: Consider bounding this below by self.window_length candidates = all_dates[:current_start_pos + 1] # Choose the latest date in the candidates that is the start of a new # period at our frequency. choices = select_sampling_indices(candidates, self._frequency) # If we have choices, the last choice is the first date if the # period containing current_start_date. Choose it. new_start_date = candidates[choices[-1]] # Add the difference between the new and old start dates to get the # number of rows for the new start_date. new_start_pos = all_dates.get_loc(new_start_date) assert new_start_pos <= current_start_pos, \ "Computed negative extra rows!" return min_extra_rows + (current_start_pos - new_start_pos)