def _compute_ratio( self, index: Union[pd.DatetimeIndex, pd.PeriodIndex]) -> np.ndarray: if isinstance(index, pd.PeriodIndex): index = index.to_timestamp() delta = index.to_perioddelta(self._freq) pi = index.to_period(self._freq) gap = (pi + 1).to_timestamp() - pi.to_timestamp() return to_numpy(delta) / to_numpy(gap)
def in_sample(self) -> pd.DataFrame: if self._cached_in_sample is not None: return self._cached_in_sample index = self._index if not self._deterministic_terms: return pd.DataFrame(np.empty((index.shape[0], 0)), index=index) raw_terms = [] for term in self._deterministic_terms: raw_terms.append(term.in_sample(index)) raw_terms = self._adjust_dummies(raw_terms) terms: pd.DataFrame = pd.concat(raw_terms, axis=1) terms = self._remove_zeros_ones(terms) if self._drop: terms_arr = to_numpy(terms) res = qr(terms_arr, mode="r", pivoting=True) r = res[0] p = res[-1] abs_diag = np.abs(np.diag(r)) tol = abs_diag[0] * terms_arr.shape[1] * np.finfo(float).eps rank = int(np.sum(abs_diag > tol)) rpx = r.T @ terms_arr keep = [0] last_rank = 1 # Find the left-most columns that produce full rank for i in range(1, terms_arr.shape[1]): curr_rank = np.linalg.matrix_rank(rpx[:i + 1, :i + 1]) if curr_rank > last_rank: keep.append(i) last_rank = curr_rank if curr_rank == rank: break if len(keep) == rank: terms = terms.iloc[:, keep] else: terms = terms.iloc[:, np.sort(p[:rank])] self._retain_cols = terms.columns self._cached_in_sample = terms return terms