예제 #1
0
 def _compute_ratio(
         self, index: Union[pd.DatetimeIndex,
                            pd.PeriodIndex]) -> np.ndarray:
     if isinstance(index, pd.PeriodIndex):
         index = index.to_timestamp()
     delta = index.to_perioddelta(self._freq)
     pi = index.to_period(self._freq)
     gap = (pi + 1).to_timestamp() - pi.to_timestamp()
     return to_numpy(delta) / to_numpy(gap)
예제 #2
0
    def in_sample(self) -> pd.DataFrame:
        if self._cached_in_sample is not None:
            return self._cached_in_sample
        index = self._index
        if not self._deterministic_terms:
            return pd.DataFrame(np.empty((index.shape[0], 0)), index=index)
        raw_terms = []
        for term in self._deterministic_terms:
            raw_terms.append(term.in_sample(index))

        raw_terms = self._adjust_dummies(raw_terms)
        terms: pd.DataFrame = pd.concat(raw_terms, axis=1)
        terms = self._remove_zeros_ones(terms)
        if self._drop:
            terms_arr = to_numpy(terms)
            res = qr(terms_arr, mode="r", pivoting=True)
            r = res[0]
            p = res[-1]
            abs_diag = np.abs(np.diag(r))
            tol = abs_diag[0] * terms_arr.shape[1] * np.finfo(float).eps
            rank = int(np.sum(abs_diag > tol))
            rpx = r.T @ terms_arr
            keep = [0]
            last_rank = 1
            # Find the left-most columns that produce full rank
            for i in range(1, terms_arr.shape[1]):
                curr_rank = np.linalg.matrix_rank(rpx[:i + 1, :i + 1])
                if curr_rank > last_rank:
                    keep.append(i)
                    last_rank = curr_rank
                if curr_rank == rank:
                    break
            if len(keep) == rank:
                terms = terms.iloc[:, keep]
            else:
                terms = terms.iloc[:, np.sort(p[:rank])]
        self._retain_cols = terms.columns
        self._cached_in_sample = terms
        return terms