def get_previous_business_day(super_set: pd.DatetimeIndex, sub_set: pd.DatetimeIndex): diff = sub_set.difference(super_set) while len(diff) > 0: new_dates = diff - pd.tseries.offsets.BDay(1) sub_set = new_dates.union(sub_set.intersection(super_set)) diff = sub_set.difference(super_set) return sub_set
class SetDisjoint: def setup(self): N = 10**5 B = N + 20000 self.datetime_left = DatetimeIndex(range(N)) self.datetime_right = DatetimeIndex(range(N, B)) def time_datetime_difference_disjoint(self): self.datetime_left.difference(self.datetime_right)
def test_datetimeindex_diff(self): dti1 = DatetimeIndex(freq='Q-JAN', start=datetime(1997, 12, 31), periods=100) dti2 = DatetimeIndex(freq='Q-JAN', start=datetime(1997, 12, 31), periods=98) assert len(dti1.difference(dti2)) == 2
def apply_to_issue_times( self, issue_times: pd.DatetimeIndex) -> Iterable[IssueTimesPartition]: num_partitions = self.num_folds * self.num_repeats partition_size = len(issue_times) / num_partitions partitions = [] for i in range(num_partitions): start = round(i * partition_size) stop = round((i + 1) * partition_size) partitions.append(issue_times[start:stop]) for i in range(self.num_folds): test_set = pd.DatetimeIndex( np.concatenate(partitions[i::self.num_folds])) train_set = issue_times.difference(test_set) yield IssueTimesPartition(train_set, test_set)
def missing_dataset_days( self, desireddays: pd.DatetimeIndex) -> pd.DatetimeIndex: """ Checks if the desired daily timerange is present in the dataset, for the variable + operation of interest. Returns the difference. """ with nc.Dataset(self.datapath, mode='r') as ds: try: datearray = nc.num2date( ds[self.operation]['time'][:], units=ds[self.operation]['time'].units, calendar=ds[self.operation]['time'].calendar) presentdays = pd.DatetimeIndex(datearray) except IndexError: # then the time dimension is completely empty presentdays = pd.DatetimeIndex([]) #for var in presentset.variables.keys(): # print(presentset[var]) return (desireddays.difference(presentdays))
def test_datetimeindex_diff(self): dti1 = DatetimeIndex(freq='Q-JAN', start=datetime(1997, 12, 31), periods=100) dti2 = DatetimeIndex(freq='Q-JAN', start=datetime(1997, 12, 31), periods=98) self.assertEqual(len(dti1.difference(dti2)), 2)