def test_historical_regular_holidays_fall_into_precomputed_holidays(self): from pandas import DatetimeIndex precomputed_holidays = DatetimeIndex(self.calendar.adhoc_holidays) # precomputed holidays won't include weekends (saturday, sunday) self.assertTrue(all(d.weekday() < 5 for d in precomputed_holidays)) generated_holidays = self.calendar.regular_holidays.holidays( precomputed_holidays.min(), precomputed_holidays.max(), return_name=True) # generated holidays include weekends self.assertFalse(all(d.weekday() < 5 for d in generated_holidays.index)) # filter non weekend generated holidays non_weekend_mask = DatetimeIndex( [d for d in generated_holidays.index if d.weekday() < 5]) non_weekend_generated_holidays = generated_holidays[non_weekend_mask] # generated holidays should generally fall into one of the precomputed holidays # except the future holidays that are not precomputed yet isin = non_weekend_generated_holidays.index.isin(precomputed_holidays) missing = non_weekend_generated_holidays[~isin] self.assertTrue(all(isin), "missing holidays = \n%s" % missing)
def read_eng_file(private_file: str, date_index: bool = True, compute_date: bool = True, allowed_flags: Sequence[int] = (0, ), dates: pd.DatetimeIndex = None) -> pd.DataFrame: """Read a .eof.csv (engineering output file, comma-separated value format) file Parameters ---------- private_file: the path to the private netCDF file or the .eof.csv file date_index: if `True` then the returned dataframe is indexed by date. compute_date: if `True` and `date_index` is `False`, then a 'date' column is added to the dataframe containing the observation datetimes. Has no effect if reading a netCDF file. allowed_flags: which quality flags are kept in the dataframe. If `None` or the string `'all'` any flag is valid. dates: a date array indicating the date range of data to retain. Data between the min and max of this array will be kept. If this is `None`, no date limiting is done. If this is given, `compute_date` is considered `True` regardless of its actual value. Returns ------- pd.DataFrame: dataframe with all the information from the .eof.csv file """ if private_file.endswith('.nc') or private_file.endswith('.nc4'): df = _read_private_nc(private_file, date_index=date_index) else: df = _read_eof_csv(private_file, date_index=date_index, compute_date=compute_date) if allowed_flags is None or allowed_flags == 'all': xx = df['flag'] > -99 else: xx = df['flag'].isin(allowed_flags) if dates is not None: if date_index: df_dates = this_df.index else: df_dates = this_df['date'] xx &= (this_df['date'] >= dates.min()) & (this_df['date'] <= dates.max()) return df[xx]
def to_tree(cls, node: pd.DatetimeIndex, ctx): """Serialize DatetimeIndex to tree.""" tree = {} if node.inferred_freq is not None: tree["freq"] = node.inferred_freq else: tree["values"] = node.values.astype(np.int64) tree["start"] = node[0] tree["end"] = node[-1] tree["min"] = node.min() tree["max"] = node.max() return tree
def to_yaml_tree(self, obj: pd.DatetimeIndex, tag: str, ctx) -> dict: """Convert to python dict.""" tree = {} if obj.inferred_freq is not None: tree["freq"] = obj.inferred_freq else: tree["values"] = obj.values.astype(np.int64) tree["start"] = obj[0] tree["end"] = obj[-1] tree["min"] = obj.min() tree["max"] = obj.max() return tree
def create_cal(trade_dates: pd.DatetimeIndex) -> pd.DataFrame: """构造交易日历 Args: trade_dates (pd.DatatimeIndex, optional): 交易日. Defaults to None. Returns: pd.DataFrame: 交易日历表 """ min_date = trade_dates.min() max_date = trade_dates.max() dates = pd.date_range(min_date, max_date) df = pd.DataFrame(index=dates) df['is_tradeday'] = False df.loc[trade_dates, 'is_tradeday'] = True return df