def test_historical_regular_holidays_fall_into_precomputed_holidays(self): from pandas import DatetimeIndex precomputed_holidays = DatetimeIndex(self.calendar.adhoc_holidays) # precomputed holidays won't include weekends (saturday, sunday) self.assertTrue(all(d.weekday() < 5 for d in precomputed_holidays)) generated_holidays = self.calendar.regular_holidays.holidays( precomputed_holidays.min(), precomputed_holidays.max(), return_name=True) # generated holidays include weekends self.assertFalse(all(d.weekday() < 5 for d in generated_holidays.index)) # filter non weekend generated holidays non_weekend_mask = DatetimeIndex( [d for d in generated_holidays.index if d.weekday() < 5]) non_weekend_generated_holidays = generated_holidays[non_weekend_mask] # generated holidays should generally fall into one of the precomputed holidays # except the future holidays that are not precomputed yet isin = non_weekend_generated_holidays.index.isin(precomputed_holidays) missing = non_weekend_generated_holidays[~isin] self.assertTrue(all(isin), "missing holidays = \n%s" % missing)
def read_eng_file(private_file: str, date_index: bool = True, compute_date: bool = True, allowed_flags: Sequence[int] = (0, ), dates: pd.DatetimeIndex = None) -> pd.DataFrame: """Read a .eof.csv (engineering output file, comma-separated value format) file Parameters ---------- private_file: the path to the private netCDF file or the .eof.csv file date_index: if `True` then the returned dataframe is indexed by date. compute_date: if `True` and `date_index` is `False`, then a 'date' column is added to the dataframe containing the observation datetimes. Has no effect if reading a netCDF file. allowed_flags: which quality flags are kept in the dataframe. If `None` or the string `'all'` any flag is valid. dates: a date array indicating the date range of data to retain. Data between the min and max of this array will be kept. If this is `None`, no date limiting is done. If this is given, `compute_date` is considered `True` regardless of its actual value. Returns ------- pd.DataFrame: dataframe with all the information from the .eof.csv file """ if private_file.endswith('.nc') or private_file.endswith('.nc4'): df = _read_private_nc(private_file, date_index=date_index) else: df = _read_eof_csv(private_file, date_index=date_index, compute_date=compute_date) if allowed_flags is None or allowed_flags == 'all': xx = df['flag'] > -99 else: xx = df['flag'].isin(allowed_flags) if dates is not None: if date_index: df_dates = this_df.index else: df_dates = this_df['date'] xx &= (this_df['date'] >= dates.min()) & (this_df['date'] <= dates.max()) return df[xx]
def to_tree(cls, node: pd.DatetimeIndex, ctx): """Serialize DatetimeIndex to tree.""" tree = {} if node.inferred_freq is not None: tree["freq"] = node.inferred_freq else: tree["values"] = node.values.astype(np.int64) tree["start"] = node[0] tree["end"] = node[-1] tree["min"] = node.min() tree["max"] = node.max() return tree
def to_yaml_tree(self, obj: pd.DatetimeIndex, tag: str, ctx) -> dict: """Convert to python dict.""" tree = {} if obj.inferred_freq is not None: tree["freq"] = obj.inferred_freq else: tree["values"] = obj.values.astype(np.int64) tree["start"] = obj[0] tree["end"] = obj[-1] tree["min"] = obj.min() tree["max"] = obj.max() return tree
def create_cal(trade_dates: pd.DatetimeIndex) -> pd.DataFrame: """构造交易日历 Args: trade_dates (pd.DatatimeIndex, optional): 交易日. Defaults to None. Returns: pd.DataFrame: 交易日历表 """ min_date = trade_dates.min() max_date = trade_dates.max() dates = pd.date_range(min_date, max_date) df = pd.DataFrame(index=dates) df['is_tradeday'] = False df.loc[trade_dates, 'is_tradeday'] = True return df
def _get_features( self, train_index: pd.DatetimeIndex, prediction_length: int, custom_features: np.ndarray = None, ) -> Tuple[np.ndarray, np.ndarray]: """ Internal method for computing default, (optional) seasonal features for the training and prediction ranges given time index for the training range and the prediction length. Appends `custom_features` if provided. Parameters ---------- train_index Pandas DatetimeIndex prediction_length prediction length custom_features shape: (num_custom_features, train_length + pred_length) Returns ------- a tuple of (training, prediction) feature tensors shape: (num_features, train_length/pred_length) """ train_length = len(train_index) full_time_index = pd.date_range( train_index.min(), periods=train_length + prediction_length, freq=train_index.freq, ) # Default feature map for both seasonal and non-seasonal models. if self._is_exp_kernel(): # Default time index features: index of the time point # [0, train_length + pred_length - 1] features = np.expand_dims(np.array(range(len(full_time_index))), axis=0) # Rescale time index features into the range: [-0.5, 0.5] # similar to the seasonal features # (see gluonts.time_feature) features = features / (train_length + prediction_length - 1) - 0.5 else: # For uniform seasonal model we do not add time index features features = np.empty((0, len(full_time_index))) # Add more features for seasonal variant if self.use_seasonal_model: if custom_features is not None: total_length = train_length + prediction_length assert len(custom_features.shape) == 2, ( "Custom features should be 2D-array where the rows " "represent features and columns the time points.") assert custom_features.shape[1] == total_length, ( f"For a seasonal model, feat_dynamic_real must be defined " f"for both training and prediction ranges. They are only " f"provided for {custom_features.shape[1]} time steps " f"instead of {train_length + prediction_length} steps.") features = np.vstack( [features, self.feature_scale * custom_features]) if self.use_default_time_features or custom_features is None: # construct seasonal features seasonal_features_gen = time_features_from_frequency_str( full_time_index.freqstr) seasonal_features = [ self.feature_scale * gen(full_time_index) for gen in seasonal_features_gen[:self.num_default_time_features] ] features = np.vstack([features, *seasonal_features]) train_features = features[:, :train_length] pred_features = features[:, train_length:] return train_features, pred_features